internal static void DStreamConstantDStreamSample() { var sc = SparkCLRSamples.SparkContext; var ssc = new StreamingContext(sc, 2); const int count = 100; const int partitions = 2; // create the RDD var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), 2); var dstream = new ConstantInputDStream <int>(seedRDD, ssc); dstream.ForeachRDD((time, rdd) => { long batchCount = rdd.Count(); int numPartitions = rdd.GetNumPartitions(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Count: " + batchCount); Console.WriteLine("Partitions: " + numPartitions); Assert.AreEqual(count, batchCount); Assert.AreEqual(partitions, numPartitions); }); ssc.Start(); ssc.AwaitTermination(); }
public void TestConstantInputDStream() { var sc = new SparkContext("", ""); var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1); var ssc = new StreamingContext(sc, 1000L); // test when rdd is null Assert.Throws<ArgumentNullException>(() => new ConstantInputDStream<int>(null, ssc)); var constantInputDStream = new ConstantInputDStream<int>(rdd, ssc); Assert.IsNotNull(constantInputDStream); Assert.AreEqual(ssc, constantInputDStream.streamingContext); }
private static void DStreamReduceByKeyAndWindowSample() { count = 0; const long bacthIntervalMs = 2000; // batch interval is in milliseconds const int windowDuration = 26; // window duration in seconds const int numPartitions = 2; var sc = SparkCLRSamples.SparkContext; var ssc = new StreamingContext(sc, bacthIntervalMs); // create the RDD var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), numPartitions); var numbers = new ConstantInputDStream <int>(seedRDD, ssc); var pairs = numbers.Map(n => new Tuple <int, int>(n % numPartitions, n)); var reduced = pairs.ReduceByKeyAndWindow( (int x, int y) => (x + y), (int x, int y) => (x - y), windowDuration, slideDuration, numPartitions ); reduced.ForeachRDD((time, rdd) => { count++; var taken = rdd.Collect(); int partitions = rdd.GetNumPartitions(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Batch: " + count); Console.WriteLine("Count: " + taken.Length); Console.WriteLine("Partitions: " + partitions); Assert.AreEqual(taken.Length, 2); Assert.AreEqual(partitions, numPartitions); foreach (object record in taken) { Tuple <int, int> sum = (Tuple <int, int>)record; Console.WriteLine("Key: {0}, Value: {1}", sum.Item1, sum.Item2); // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively Assert.AreEqual(sum.Item2, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Item1 == 0 ? 2450 : 2500)); } }); ssc.Start(); ssc.AwaitTermination(); }
public void TestConstantInputDStream() { var sc = new SparkContext("", ""); var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1); var ssc = new StreamingContext(sc, 1); // test when rdd is null Assert.Throws <ArgumentNullException>(() => new ConstantInputDStream <int>(null, ssc)); var constantInputDStream = new ConstantInputDStream <int>(rdd, ssc); Assert.IsNotNull(constantInputDStream); Assert.AreEqual(ssc, constantInputDStream.streamingContext); }
internal static void DStreamConstantDStreamSample() { var sc = SparkCLRSamples.SparkContext; var ssc = new StreamingContext(sc, 2000L); const int count = 100; const int partitions = 2; // create the RDD var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), 2); var dstream = new ConstantInputDStream<int>(seedRDD, ssc); dstream.ForeachRDD((time, rdd) => { long batchCount = rdd.Count(); int numPartitions = rdd.GetNumPartitions(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Count: " + batchCount); Console.WriteLine("Partitions: " + numPartitions); Assert.AreEqual(count, batchCount); Assert.AreEqual(partitions, numPartitions); }); ssc.Start(); ssc.AwaitTermination(); }
private static void DStreamReduceByKeyAndWindowSample() { count = 0; const long bacthIntervalMs = 2000; // batch interval is in milliseconds const int windowDuration = 26; // window duration in seconds const int numPartitions = 2; var sc = SparkCLRSamples.SparkContext; var ssc = new StreamingContext(sc, bacthIntervalMs); // create the RDD var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), numPartitions); var numbers = new ConstantInputDStream<int>(seedRDD, ssc); var pairs = numbers.Map(n => new KeyValuePair<int, int>(n % numPartitions, n)); var reduced = pairs.ReduceByKeyAndWindow( (int x, int y) => (x + y), (int x, int y) => (x - y), windowDuration, slideDuration, numPartitions ); reduced.ForeachRDD((time, rdd) => { count++; var taken = rdd.Collect(); int partitions = rdd.GetNumPartitions(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Batch: " + count); Console.WriteLine("Count: " + taken.Length); Console.WriteLine("Partitions: " + partitions); Assert.AreEqual(taken.Length, 2); Assert.AreEqual(partitions, numPartitions); foreach (object record in taken) { KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record; Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value); // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500)); } }); ssc.Start(); ssc.AwaitTermination(); }