示例#1
0
        internal static void DStreamConstantDStreamSample()
        {
            var sc  = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, 2);

            const int count      = 100;
            const int partitions = 2;

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), 2);
            var dstream = new ConstantInputDStream <int>(seedRDD, ssc);

            dstream.ForeachRDD((time, rdd) =>
            {
                long batchCount   = rdd.Count();
                int numPartitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Count: " + batchCount);
                Console.WriteLine("Partitions: " + numPartitions);
                Assert.AreEqual(count, batchCount);
                Assert.AreEqual(partitions, numPartitions);
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
示例#2
0
        public void TestConstantInputDStream()
        {
            var sc = new SparkContext("", "");
            var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1);
            var ssc = new StreamingContext(sc, 1000L);

            // test when rdd is null
            Assert.Throws<ArgumentNullException>(() => new ConstantInputDStream<int>(null, ssc));

            var constantInputDStream = new ConstantInputDStream<int>(rdd, ssc);
            Assert.IsNotNull(constantInputDStream);
            Assert.AreEqual(ssc, constantInputDStream.streamingContext);
        }
示例#3
0
        private static void DStreamReduceByKeyAndWindowSample()
        {
            count = 0;

            const long bacthIntervalMs = 2000; // batch interval is in milliseconds
            const int  windowDuration  = 26;   // window duration in seconds
            const int  numPartitions   = 2;

            var sc  = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, bacthIntervalMs);

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), numPartitions);
            var numbers = new ConstantInputDStream <int>(seedRDD, ssc);
            var pairs   = numbers.Map(n => new Tuple <int, int>(n % numPartitions, n));
            var reduced = pairs.ReduceByKeyAndWindow(
                (int x, int y) => (x + y),
                (int x, int y) => (x - y),
                windowDuration,
                slideDuration,
                numPartitions
                );

            reduced.ForeachRDD((time, rdd) =>
            {
                count++;
                var taken      = rdd.Collect();
                int partitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Batch: " + count);
                Console.WriteLine("Count: " + taken.Length);
                Console.WriteLine("Partitions: " + partitions);

                Assert.AreEqual(taken.Length, 2);
                Assert.AreEqual(partitions, numPartitions);

                foreach (object record in taken)
                {
                    Tuple <int, int> sum = (Tuple <int, int>)record;
                    Console.WriteLine("Key: {0}, Value: {1}", sum.Item1, sum.Item2);
                    // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
                    Assert.AreEqual(sum.Item2, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Item1 == 0 ? 2450 : 2500));
                }
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
示例#4
0
        public void TestConstantInputDStream()
        {
            var sc  = new SparkContext("", "");
            var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1);
            var ssc = new StreamingContext(sc, 1);

            // test when rdd is null
            Assert.Throws <ArgumentNullException>(() => new ConstantInputDStream <int>(null, ssc));

            var constantInputDStream = new ConstantInputDStream <int>(rdd, ssc);

            Assert.IsNotNull(constantInputDStream);
            Assert.AreEqual(ssc, constantInputDStream.streamingContext);
        }
示例#5
0
        internal static void DStreamConstantDStreamSample()
        {
            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, 2000L);

            const int count = 100;
            const int partitions = 2;

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), 2);
            var dstream = new ConstantInputDStream<int>(seedRDD, ssc);

            dstream.ForeachRDD((time, rdd) =>
            {
                long batchCount = rdd.Count();
                int numPartitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Count: " + batchCount);
                Console.WriteLine("Partitions: " + numPartitions);
                Assert.AreEqual(count, batchCount);
                Assert.AreEqual(partitions, numPartitions);
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
示例#6
0
        private static void DStreamReduceByKeyAndWindowSample()
        {
            count = 0;

            const long bacthIntervalMs = 2000; // batch interval is in milliseconds
            const int windowDuration = 26;     // window duration in seconds
            const int numPartitions = 2;

            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, bacthIntervalMs);

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), numPartitions);
            var numbers = new ConstantInputDStream<int>(seedRDD, ssc);
            var pairs = numbers.Map(n => new KeyValuePair<int, int>(n % numPartitions, n));
            var reduced = pairs.ReduceByKeyAndWindow(
                    (int x, int y) => (x + y),
                    (int x, int y) => (x - y),
                    windowDuration,
                    slideDuration,
                    numPartitions
                );

            reduced.ForeachRDD((time, rdd) =>
            {
                count++;
                var taken = rdd.Collect();
                int partitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Batch: " + count);
                Console.WriteLine("Count: " + taken.Length);
                Console.WriteLine("Partitions: " + partitions);

                Assert.AreEqual(taken.Length, 2);
                Assert.AreEqual(partitions, numPartitions);

                foreach (object record in taken)
                {
                    KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record;
                    Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value);
                    // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
                    Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500));
                }
            });

            ssc.Start();
            ssc.AwaitTermination();
        }