Ejemplo n.º 1
0
        /// <summary>
        /// Return a new "state" DStream where the state for each key is updated by applying
        /// the given function on the previous state of the key and the new values of the key.
        /// </summary>
        public static MapWithStateDStream <K, V, S, M> MapWithState <K, V, S, M>(this DStream <Tuple <K, V> > self, StateSpec <K, V, S, M> stateSpec)
        {
            if (stateSpec.numPartitions <= 0)
            {
                stateSpec = stateSpec.NumPartitions(self.streamingContext.SparkContext.DefaultParallelism);
            }

            Func <double, RDD <dynamic>, RDD <dynamic> > prevFunc = self.Piplinable ? (self as TransformedDStream <Tuple <K, V> >).func : null;

            Func <double, RDD <dynamic>, RDD <dynamic>, RDD <dynamic> > func = new MapWithStateHelper <K, V, S, M>(prevFunc, stateSpec).Execute;

            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, func);

            var mapWithStateDStream = new DStream <MapWithStateRDDRecord <K, S, M> >(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
                                                                                         self.Piplinable ? self.prevDStreamProxy : self.DStreamProxy,
                                                                                         stream.ToArray(),
                                                                                         "CSharpStateDStream",
                                                                                         self.serializedMode.ToString(),
                                                                                         (self.Piplinable ? self.prevSerializedMode : self.serializedMode).ToString()),
                                                                                     self.streamingContext);

            DStream <M>             mappedDataDStream = mapWithStateDStream.FlatMap(r => r.mappedData);
            DStream <Tuple <K, S> > snapshotsDStream  = mapWithStateDStream.FlatMap(
                r => r.stateMap.Select(entry => new Tuple <K, S>(entry.Key, entry.Value.state)));

            return(new MapWithStateDStream <K, V, S, M>(mappedDataDStream, snapshotsDStream));
        }
 /// <summary>
 /// Return a new DStream by applying a flatmap function to the value
 /// of each key-value pairs in this DStream without changing the key.
 /// </summary>
 /// <typeparam name="K"></typeparam>
 /// <typeparam name="V"></typeparam>
 /// <typeparam name="U"></typeparam>
 /// <param name="self"></param>
 /// <param name="func"></param>
 /// <returns></returns>
 public static DStream <KeyValuePair <K, U> > FlatMapValues <K, V, U>(this DStream <KeyValuePair <K, V> > self, Func <V, IEnumerable <U> > func)
 {
     return(self.FlatMap(new FlatMapValuesHelper <K, V, U>(func).Execute, true));
 }