public void TestStreamingContext() { var ssc = new StreamingContext(new SparkContext("", ""), 1); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); ssc.Start(); ssc.Remember(1); ssc.Checkpoint(Path.GetTempPath()); var textFile = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(textFile.DStreamProxy); var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345); Assert.IsNotNull(socketStream.DStreamProxy); var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>()); Assert.IsNotNull(kafkaStream.DStreamProxy); var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStream.DStreamProxy); var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), 10); Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy); var union = ssc.Union(textFile, socketStream); Assert.IsNotNull(union.DStreamProxy); ssc.AwaitTermination(); ssc.Stop(); }
public void TestStreamingContext() { var ssc = new StreamingContext(new SparkContext("", ""), 1000); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); ssc.Start(); ssc.Remember(1000); ssc.Checkpoint(Path.GetTempPath()); var textFile = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(textFile.DStreamProxy); var socketStream = ssc.SocketTextStream("127.0.0.1", 12345); Assert.IsNotNull(socketStream.DStreamProxy); var kafkaStream = ssc.KafkaStream("127.0.0.1:2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>()); Assert.IsNotNull(kafkaStream.DStreamProxy); var directKafkaStream = ssc.DirectKafkaStream(new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStream.DStreamProxy); var union = ssc.Union(textFile, socketStream); Assert.IsNotNull(union.DStreamProxy); ssc.AwaitTermination(); ssc.Stop(); }
internal static void DStreamConstantDStreamSample() { var sc = SparkCLRSamples.SparkContext; var ssc = new StreamingContext(sc, 2000L); const int count = 100; const int partitions = 2; // create the RDD var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), 2); var dstream = new ConstantInputDStream<int>(seedRDD, ssc); dstream.ForeachRDD((time, rdd) => { long batchCount = rdd.Count(); int numPartitions = rdd.GetNumPartitions(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Count: " + batchCount); Console.WriteLine("Partitions: " + numPartitions); Assert.AreEqual(count, batchCount); Assert.AreEqual(partitions, numPartitions); }); ssc.Start(); ssc.AwaitTermination(); }
private static void DStreamReduceByKeyAndWindowSample() { count = 0; const long bacthIntervalMs = 2000; // batch interval is in milliseconds const int windowDuration = 26; // window duration in seconds const int numPartitions = 2; var sc = SparkCLRSamples.SparkContext; var ssc = new StreamingContext(sc, bacthIntervalMs); // create the RDD var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), numPartitions); var numbers = new ConstantInputDStream<int>(seedRDD, ssc); var pairs = numbers.Map(n => new KeyValuePair<int, int>(n % numPartitions, n)); var reduced = pairs.ReduceByKeyAndWindow( (int x, int y) => (x + y), (int x, int y) => (x - y), windowDuration, slideDuration, numPartitions ); reduced.ForeachRDD((time, rdd) => { count++; var taken = rdd.Collect(); int partitions = rdd.GetNumPartitions(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Batch: " + count); Console.WriteLine("Count: " + taken.Length); Console.WriteLine("Partitions: " + partitions); Assert.AreEqual(taken.Length, 2); Assert.AreEqual(partitions, numPartitions); foreach (object record in taken) { KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record; Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value); // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500)); } }); ssc.Start(); ssc.AwaitTermination(); }
internal static void DStreamCSharpInputSample() { const int numPartitions = 5; var sc = SparkCLRSamples.SparkContext; var ssc = new StreamingContext(sc, 2000L); // batch interval is in milliseconds var inputDStream = CSharpInputDStreamUtils.CreateStream<string>( ssc, numPartitions, (double time, int pid) => { var list = new List<string>() { string.Format("PluggableInputDStream-{0}-{1}", pid, time) }; return list.AsEnumerable(); }); inputDStream.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); int partitions = rdd.GetNumPartitions(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Count: " + taken.Length); Console.WriteLine("Partitions: " + partitions); foreach (object record in taken) { Console.WriteLine(record); } }); ssc.Start(); ssc.AwaitTermination(); }
public void TestStreamingContext() { var ssc = new StreamingContext(new SparkContext("", ""), 1000L); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); ssc.Start(); ssc.Remember(1000L); ssc.Checkpoint(Path.GetTempPath()); var textFile = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(textFile.DStreamProxy); var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345); Assert.IsNotNull(socketStream.DStreamProxy); var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>()); Assert.IsNotNull(kafkaStream.DStreamProxy); var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStream.DStreamProxy); ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numPartitions.testTopic3", "10"); var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy); var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStream( ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; }); Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc); ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numReceivers", "10"); var directKafkaReceiver = KafkaUtils.CreateDirectStream( ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; }); Assert.IsNotNull(directKafkaReceiver.DStreamProxy); var union = ssc.Union(textFile, socketStream); Assert.IsNotNull(union.DStreamProxy); ssc.AwaitTermination(); ssc.Stop(); }