コード例 #1
0
        /// <summary>
        /// Return a new "state" DStream where the state for each key is updated by applying
        /// the given function on the previous state of the key and the new values of the key.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
        /// <typeparam name="S"></typeparam>
        /// <param name="self"></param>
        /// <param name="updateFunc">State update function. If this function returns None, then corresponding state key-value pair will be eliminated.</param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream <KeyValuePair <K, S> > UpdateStateByKey <K, V, S>(this DStream <KeyValuePair <K, V> > self,
                                                                                Func <IEnumerable <KeyValuePair <K, Tuple <IEnumerable <V>, S> > >, IEnumerable <KeyValuePair <K, S> > > updateFunc,
                                                                                int numPartitions = 0)
        {
            if (numPartitions <= 0)
            {
                numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
            }

            Func <double, RDD <dynamic>, RDD <dynamic> > prevFunc = self.Piplinable ? (self as TransformedDStream <KeyValuePair <K, V> >).func : null;

            Func <double, RDD <dynamic>, RDD <dynamic>, RDD <dynamic> > func = new UpdateStateByKeysHelper <K, V, S>(updateFunc, prevFunc, numPartitions).Execute;

            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, func);

            return(new DStream <KeyValuePair <K, S> >(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
                                                          self.Piplinable ? self.prevDStreamProxy : self.DStreamProxy,
                                                          stream.ToArray(),
                                                          self.serializedMode.ToString(),
                                                          (self.Piplinable ? self.prevSerializedMode : self.serializedMode).ToString()),
                                                      self.streamingContext));
        }
コード例 #2
0
        /// <summary>
        /// Return a new "state" DStream where the state for each key is updated by applying
        /// the given function on the previous state of the key and the new values of the key.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
        /// <typeparam name="S"></typeparam>
        /// <param name="self"></param>
        /// <param name="updateFunc">State update function - (pid, IEnumerable[K, [newValues, oldState]]) => IEnumerable[K, newState]</param>
        /// <param name="initialState">Initial state value of each key</param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream <Tuple <K, S> > UpdateStateByKey <K, V, S>(this DStream <Tuple <K, V> > self,
                                                                         Func <int, IEnumerable <Tuple <K, Tuple <IEnumerable <V>, S> > >, IEnumerable <Tuple <K, S> > > updateFunc,
                                                                         RDD <Tuple <K, S> > initialState = null, int numPartitions = 0)
        {
            if (numPartitions <= 0)
            {
                numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
            }

            // completes pipelinable dstream by adding the last pipelinable operation
            // before transforming to CSharpStateDStream so that UpdateStateByKey's
            // parallel job covers all pipelinable operations before shuffling
            var ds = self.Transform(new AddShuffleKeyHelper <K, V>(numPartitions).Execute);

            Func <double, RDD <dynamic>, RDD <dynamic>, RDD <dynamic> > func = new UpdateStateByKeysHelper <K, V, S>(updateFunc, initialState, numPartitions).Execute;

            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, func);

            return(new DStream <Tuple <K, S> >(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
                                                   ds.DStreamProxy,
                                                   stream.ToArray(),
                                                   "CSharpStateDStream",
                                                   ds.serializedMode.ToString(),
                                                   ds.serializedMode.ToString()),
                                               self.streamingContext));
        }