DStream.Transform, Microsoft.Spark.CSharp.Streaming C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : PairDStreamFunctions.cs Projet : zwffff2015/Mobius

        /// <summary>
        /// Return a new "state" DStream where the state for each key is updated by applying
        /// the given function on the previous state of the key and the new values of the key.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
        /// <typeparam name="S"></typeparam>
        /// <param name="self"></param>
        /// <param name="updateFunc">State update function - (pid, IEnumerable[K, [newValues, oldState]]) => IEnumerable[K, newState]</param>
        /// <param name="initialState">Initial state value of each key</param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream <Tuple <K, S> > UpdateStateByKey <K, V, S>(this DStream <Tuple <K, V> > self,
                                                                         Func <int, IEnumerable <Tuple <K, Tuple <IEnumerable <V>, S> > >, IEnumerable <Tuple <K, S> > > updateFunc,
                                                                         RDD <Tuple <K, S> > initialState = null, int numPartitions = 0)
        {
            if (numPartitions <= 0)
            {
                numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
            }

            // completes pipelinable dstream by adding the last pipelinable operation
            // before transforming to CSharpStateDStream so that UpdateStateByKey's
            // parallel job covers all pipelinable operations before shuffling
            var ds = self.Transform(new AddShuffleKeyHelper <K, V>(numPartitions).Execute);

            Func <double, RDD <dynamic>, RDD <dynamic>, RDD <dynamic> > func = new UpdateStateByKeysHelper <K, V, S>(updateFunc, initialState, numPartitions).Execute;

            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, func);

            return(new DStream <Tuple <K, S> >(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
                                                   ds.DStreamProxy,
                                                   stream.ToArray(),
                                                   "CSharpStateDStream",
                                                   ds.serializedMode.ToString(),
                                                   ds.serializedMode.ToString()),
                                               self.streamingContext));
        }

Exemple #2

0

Afficher le fichier

Fichier : PairDStreamFunctions.cs Projet : sksundaram-learning/SparkCLR

        /// <summary>
        /// Return a new DStream in which each RDD are partitioned by numPartitions.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
        /// <param name="self"></param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream <KeyValuePair <K, V> > PartitionBy <K, V>(this DStream <KeyValuePair <K, V> > self, int numPartitions = 0)
        {
            if (numPartitions <= 0)
            {
                numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
            }

            return(self.Transform <KeyValuePair <K, V> >(new PartitionByHelper <K, V>(numPartitions).Execute));
        }

Exemple #3

0

Afficher le fichier

Fichier : PairDStreamFunctions.cs Projet : sksundaram-learning/SparkCLR

        /// <summary>
        /// Return a new DStream by applying combineByKey to each RDD.
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
        /// <typeparam name="C"></typeparam>
        /// <param name="self"></param>
        /// <param name="createCombiner"></param>
        /// <param name="mergeValue"></param>
        /// <param name="mergeCombiners"></param>
        /// <param name="numPartitions"></param>
        /// <returns></returns>
        public static DStream <KeyValuePair <K, C> > CombineByKey <K, V, C>(
            this DStream <KeyValuePair <K, V> > self,
            Func <C> createCombiner,
            Func <C, V, C> mergeValue,
            Func <C, C, C> mergeCombiners,
            int numPartitions = 0)
        {
            if (numPartitions <= 0)
            {
                numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
            }

            return(self.Transform <KeyValuePair <K, C> >(new CombineByKeyHelper <K, V, C>(createCombiner, mergeValue, mergeCombiners, numPartitions).Execute));
        }

Exemple #4

0

Afficher le fichier

Fichier : PairDStreamFunctions.cs Projet : sksundaram-learning/SparkCLR

 /// <summary>
 /// Return a new DStream by applying groupByKey on each RDD.
 /// </summary>
 /// <typeparam name="K"></typeparam>
 /// <typeparam name="V"></typeparam>
 /// <param name="self"></param>
 /// <param name="numPartitions"></param>
 /// <returns></returns>
 public static DStream <KeyValuePair <K, List <V> > > GroupByKey <K, V>(this DStream <KeyValuePair <K, V> > self, int numPartitions = 0)
 {
     return(self.Transform <KeyValuePair <K, List <V> > >(new GroupByKeyHelper <K, V>(numPartitions).Execute));
 }

C# (CSharp) Microsoft.Spark.CSharp.Streaming DStream.Transform Exemples