internal static void DStreamTextFileSamples() { count = 0; string directory = SparkCLRSamples.Configuration.SampleDataLocation; string checkpointPath = Path.Combine(directory, "checkpoint"); SparkContext sc = SparkCLRSamples.SparkContext; var b = sc.Broadcast <int>(0); StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { StreamingContext context = new StreamingContext(sc, 2000); context.Checkpoint(checkpointPath); var lines = context.TextFileStream(Path.Combine(directory, "test")); lines = context.Union(lines, lines); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair <string, int>(w, 1)); // since operations like ReduceByKey, Join and UpdateStateByKey are // separate dstream transformations defined in CSharpDStream.scala // an extra CSharpRDD is introduced in between these operations var wordCounts = pairs.ReduceByKey((x, y) => x + y); var join = wordCounts.Join(wordCounts, 2); var state = join.UpdateStateByKey <string, Tuple <int, int>, int>(new UpdateStateHelper(b).Execute); state.ForeachRDD((time, rdd) => { // there's chance rdd.Take conflicts with ssc.Stop if (stopFileServer) { return; } object[] taken = rdd.Take(10); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); foreach (object record in taken) { Console.WriteLine(record); } Console.WriteLine(); stopFileServer = count++ > 100; }); return(context); }); ssc.Start(); StartFileServer(directory, "words.txt", 100); ssc.AwaitTermination(); ssc.Stop(); }
public void TestBroadcast() { // Arrange long broadcastId = 100L; Mock <ISparkContextProxy> sparkContextProxy = new Mock <ISparkContextProxy>(); sparkContextProxy.Setup(m => m.ReadBroadcastFromFile(It.IsAny <string>(), out broadcastId)); SparkContext sc = new SparkContext(sparkContextProxy.Object, null); const string expectedValue = "broadcastvar1"; // Act var broadcastVar = sc.Broadcast(expectedValue); // Assert Assert.IsNotNull(broadcastVar); Assert.AreEqual(expectedValue, broadcastVar.Value); Assert.AreEqual(broadcastId, broadcastVar.broadcastId); sparkContextProxy.Verify(m => m.ReadBroadcastFromFile(It.IsAny <string>(), out broadcastId), Times.Once); }
internal static void DStreamTextFileSample() { count = 0; string directory = SparkCLRSamples.Configuration.SampleDataLocation; string checkpointPath = Path.Combine(directory, "checkpoint"); SparkContext sc = SparkCLRSamples.SparkContext; var b = sc.Broadcast <int>(0); StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { StreamingContext context = new StreamingContext(sc, 2000L); // batch interval is in milliseconds context.Checkpoint(checkpointPath); var lines = context.TextFileStream(Path.Combine(directory, "test")); lines = context.Union(lines, lines); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new Tuple <string, int>(w, 1)); // since operations like ReduceByKey, Join and UpdateStateByKey are // separate dstream transformations defined in CSharpDStream.scala // an extra CSharpRDD is introduced in between these operations var wordCounts = pairs.ReduceByKey((x, y) => x + y); var join = wordCounts.Window(2, 2).Join(wordCounts, 2); var initialStateRdd = sc.Parallelize(new[] { new Tuple <string, int>("AAA", 88), new Tuple <string, int>("BBB", 88) }); var state = join.UpdateStateByKey(new UpdateStateHelper(b).Execute, initialStateRdd); state.ForeachRDD((time, rdd) => { // there's chance rdd.Take conflicts with ssc.Stop if (stopFileServer) { return; } object[] taken = rdd.Take(10); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); foreach (object record in taken) { Console.WriteLine(record); var countByWord = (Tuple <string, int>)record; Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "lazy" || countByWord.Item1 == "dog" ? 92 : 88); } Console.WriteLine(); stopFileServer = true; }); return(context); }); StartFileServer(ssc, directory, "words.txt"); ssc.Start(); ssc.AwaitTermination(); }