public void TestDStreamMapReduce() { var ssc = new StreamingContext(new SparkContext("", ""), 1); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); var lines = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(lines.DStreamProxy); var words = lines.FlatMap(l => l.Split(' ')).Filter(w => w != "The").Repartition(1); words.Slice(DateTime.MinValue, DateTime.MaxValue); words.Cache(); words.Checkpoint(1); words.Window(1, 1); words.Count().ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 1); Assert.AreEqual((int)taken[0], 178); }); words.CountByValue().ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 8); foreach (object record in taken) { KeyValuePair<string, long> countByWord = (KeyValuePair<string, long>)record; Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22); } }); words.CountByValueAndWindow(1, 1).ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken[0], 8); }); words.CountByWindow(1).ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 1); Assert.AreEqual((int)taken[0], 356); }); words.Union(words).ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 356); }); words.Glom().ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 1); Assert.AreEqual((taken[0] as string[]).Length, 178); }); }
public void TestStreamingContext() { var ssc = new StreamingContext(new SparkContext("", ""), 1); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); ssc.Start(); ssc.Remember(1); ssc.Checkpoint(Path.GetTempPath()); var textFile = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(textFile.DStreamProxy); var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345); Assert.IsNotNull(socketStream.DStreamProxy); var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>()); Assert.IsNotNull(kafkaStream.DStreamProxy); var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStream.DStreamProxy); var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), 10); Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy); var union = ssc.Union(textFile, socketStream); Assert.IsNotNull(union.DStreamProxy); ssc.AwaitTermination(); ssc.Stop(); }
internal static void DStreamTextFileSample() { count = 0; string directory = SparkCLRSamples.Configuration.SampleDataLocation; string checkpointPath = Path.Combine(directory, "checkpoint"); SparkContext sc = SparkCLRSamples.SparkContext; var b = sc.Broadcast<int>(0); StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { StreamingContext context = new StreamingContext(sc, 2); context.Checkpoint(checkpointPath); var lines = context.TextFileStream(Path.Combine(directory, "test")); lines = context.Union(lines, lines); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); // since operations like ReduceByKey, Join and UpdateStateByKey are // separate dstream transformations defined in CSharpDStream.scala // an extra CSharpRDD is introduced in between these operations var wordCounts = pairs.ReduceByKey((x, y) => x + y); var join = wordCounts.Window(2, 2).Join(wordCounts, 2); var state = join.UpdateStateByKey<string, Tuple<int, int>, int>(new UpdateStateHelper(b).Execute); state.ForeachRDD((time, rdd) => { // there's chance rdd.Take conflicts with ssc.Stop if (stopFileServer) return; object[] taken = rdd.Take(10); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); foreach (object record in taken) { Console.WriteLine(record); var countByWord = (KeyValuePair<string, int>)record; Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "lazy" || countByWord.Key == "dog" ? 92 : 88); } Console.WriteLine(); stopFileServer = true; }); return context; }); StartFileServer(ssc, directory, "words.txt"); ssc.Start(); ssc.AwaitTermination(); }
public void TestStreamingContext() { var ssc = new StreamingContext(new SparkContext("", ""), 1000); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); ssc.Start(); ssc.Remember(1000); ssc.Checkpoint(Path.GetTempPath()); var textFile = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(textFile.DStreamProxy); var socketStream = ssc.SocketTextStream("127.0.0.1", 12345); Assert.IsNotNull(socketStream.DStreamProxy); var kafkaStream = ssc.KafkaStream("127.0.0.1:2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>()); Assert.IsNotNull(kafkaStream.DStreamProxy); var directKafkaStream = ssc.DirectKafkaStream(new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStream.DStreamProxy); var union = ssc.Union(textFile, socketStream); Assert.IsNotNull(union.DStreamProxy); ssc.AwaitTermination(); ssc.Stop(); }
internal static void DStreamTextFileSamples() { count = 0; string directory = SparkCLRSamples.Configuration.SampleDataLocation; string checkpointPath = Path.Combine(directory, "checkpoint"); StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { SparkContext sc = SparkCLRSamples.SparkContext; StreamingContext context = new StreamingContext(sc, 2000); context.Checkpoint(checkpointPath); var lines = context.TextFileStream(Path.Combine(directory, "test")); lines = context.Union(lines, lines); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); // since operations like ReduceByKey, Join and UpdateStateByKey are // separate dstream transformations defined in CSharpDStream.scala // an extra CSharpRDD is introduced in between these operations var wordCounts = pairs.ReduceByKey((x, y) => x + y); var join = wordCounts.Join(wordCounts, 2); var state = join.UpdateStateByKey<string, Tuple<int, int>, int>((vs, s) => vs.Sum(x => x.Item1 + x.Item2) + s); state.ForeachRDD((time, rdd) => { // there's chance rdd.Take conflicts with ssc.Stop if (stopFileServer) return; object[] taken = rdd.Take(10); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); foreach (object record in taken) { Console.WriteLine(record); } Console.WriteLine(); stopFileServer = count++ > 100; }); return context; }); ssc.Start(); StartFileServer(directory, "words.txt", 100); ssc.AwaitTermination(); ssc.Stop(); }
public void TestDStreamTransform() { var ssc = new StreamingContext(new SparkContext("", ""), 1000); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); var lines = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(lines.DStreamProxy); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); var wordCounts = pairs.PartitionBy().ReduceByKey((x, y) => x + y); wordCounts.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record; Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22); } }); var wordLists = pairs.GroupByKey(); wordLists.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { KeyValuePair<string, List<int>> countByWord = (KeyValuePair<string, List<int>>)record; Assert.AreEqual(countByWord.Value.Count, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22); } }); var wordCountsByWindow = pairs.ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y, 1); wordCountsByWindow.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record; Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 46 : 44); } }); }
public void TestDStreamTransform_Moq() { // Arrange var mockDStreamProxy = new Mock <IDStreamProxy>(); _mockStreamingContextProxy.Setup(m => m.TextFileStream(It.Is <string>(d => d == Path.GetTempPath()))).Returns(mockDStreamProxy.Object); mockDStreamProxy.Setup(m => m.CallForeachRDD(It.IsAny <byte[]>(), It.IsAny <string>())).Callback <byte[], string>( (func, deserializer) => { Action <double, RDD <dynamic> > f = (Action <double, RDD <dynamic> >) new BinaryFormatter().Deserialize(new MemoryStream(func)); f(DateTime.UtcNow.Ticks, new RDD <dynamic>(_mockRddProxy.Object, new SparkContext("", ""))); }); IRDDProxy functionedRddProxy = null; mockDStreamProxy.Setup(m => m.AsJavaDStream()).Returns(mockDStreamProxy.Object); _mockSparkCLRProxy.Setup(m => m.StreamingContextProxy.CreateCSharpDStream(It.IsAny <IDStreamProxy>(), It.IsAny <byte[]>(), It.IsAny <string>())) .Returns <IDStreamProxy, byte[], string>((jdstream, func, deserializer) => { Func <double, RDD <dynamic>, RDD <dynamic> > f = (Func <double, RDD <dynamic>, RDD <dynamic> >) new BinaryFormatter().Deserialize(new MemoryStream(func)); RDD <dynamic> rdd = f(DateTime.UtcNow.Ticks, new RDD <dynamic>(functionedRddProxy ?? _mockRddProxy.Object, new SparkContext("", ""))); functionedRddProxy = rdd.RddProxy; return(mockDStreamProxy.Object); }); // Act var lines = _streamingContext.TextFileStream(Path.GetTempPath()); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new Tuple <string, int>(w, 1)); var wordCounts = pairs.ReduceByKey((x, y) => x + y); // Assert wordCounts.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { Tuple <string, int> countByWord = (Tuple <string, int>)record; Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22); } }); // Use Verify to verify if a method to mock was invoked mockDStreamProxy.Verify(m => m.CallForeachRDD(It.IsAny <byte[]>(), It.IsAny <string>())); }
public void TestStreamingContext() { var ssc = new StreamingContext(new SparkContext("", ""), 1000L); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); ssc.Start(); ssc.Remember(1000L); ssc.Checkpoint(Path.GetTempPath()); var textFile = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(textFile.DStreamProxy); var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345); Assert.IsNotNull(socketStream.DStreamProxy); var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>()); Assert.IsNotNull(kafkaStream.DStreamProxy); var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStream.DStreamProxy); ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numPartitions.testTopic3", "10"); var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>()); Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy); var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStream( ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; }); Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc); ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numReceivers", "10"); var directKafkaReceiver = KafkaUtils.CreateDirectStream( ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; }); Assert.IsNotNull(directKafkaReceiver.DStreamProxy); var union = ssc.Union(textFile, socketStream); Assert.IsNotNull(union.DStreamProxy); ssc.AwaitTermination(); ssc.Stop(); }
internal static void DStreamTextFileSamples() { SparkContext sc = SparkCLRSamples.SparkContext; string directory = SparkCLRSamples.Configuration.SampleDataLocation; sc.SetCheckpointDir(directory); StreamingContext ssc = new StreamingContext(sc, 2000); var lines = ssc.TextFileStream(Path.Combine(directory, "test")); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); var wordCounts = pairs.ReduceByKey((x, y) => x + y); var join = wordCounts.Join(wordCounts, 2); var state = join.UpdateStateByKey<string, Tuple<int, int>, int>((vs, s) => vs.Sum(x => x.Item1 + x.Item2) + s); state.ForeachRDD((time, rdd) => { // there's chance rdd.Take conflicts with ssc.Stop if (stopFileServer) return; object[] taken = rdd.Take(10); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); foreach (object record in taken) { Console.WriteLine(record); } Console.WriteLine(); stopFileServer = count++ > 3; }); ssc.Start(); StartFileServer(directory, "words.txt", 100); while (!stopFileServer) { System.Threading.Thread.Sleep(1000); } // wait ForeachRDD to complete to let ssc.Stop() gracefully System.Threading.Thread.Sleep(2000); ssc.Stop(); }
static void Main(string[] args) { if (args.Length < 2) { Console.WriteLine("Usage: HdfsWordCount <checkpointDirectory> <inputDirectory>"); return; } string checkpointPath = args[0]; string inputDir = args[1]; StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { var sparkConf = new SparkConf(); sparkConf.SetAppName("HdfsWordCount"); var sc = new SparkContext(sparkConf); StreamingContext context = new StreamingContext(sc, 30000); context.Checkpoint(checkpointPath); var lines = context.TextFileStream(inputDir); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); var wordCounts = pairs.ReduceByKey((x, y) => x + y); wordCounts.ForeachRDD((time, rdd) => { Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); object[] taken = rdd.Take(10); foreach (object record in taken) { Console.WriteLine(record); } Console.WriteLine(); }); return context; }); ssc.Start(); ssc.AwaitTermination(); ssc.Stop(); }
public void TestStreamingAwaitTimeout() { var ssc = new StreamingContext(new SparkContext("", ""), 1000L); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); ssc.Start(); ssc.Remember(1000L); ssc.Checkpoint(Path.GetTempPath()); var textFile = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(textFile.DStreamProxy); var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345); Assert.IsNotNull(socketStream.DStreamProxy); var union = ssc.Union(textFile, socketStream); Assert.IsNotNull(union.DStreamProxy); ssc.AwaitTerminationOrTimeout(3000); ssc.Stop(); }
internal static void DStreamMapWithStateSample() { string directory = SparkCLRSamples.Configuration.SampleDataLocation; string checkpointPath = Path.Combine(directory, "checkpoint"); StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { SparkContext sc = SparkCLRSamples.SparkContext; StreamingContext context = new StreamingContext(sc, 10000); context.Checkpoint(checkpointPath); var lines = context.TextFileStream(Path.Combine(directory, "test1")); lines = context.Union(lines, lines); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); var wordCounts = pairs.ReduceByKey((x, y) => x + y); var initialState = sc.Parallelize(new[] { new KeyValuePair<string, int>("NOT_A_WORD", 1024), new KeyValuePair<string, int>("dog", 10000), }, 1); var stateSpec = new StateSpec<string, int, int, KeyValuePair<string, int>>((word, count, state) => { if (state.IsTimingOut()) { Console.WriteLine("Found timing out word: {0}", word); return new KeyValuePair<string, int>(word, state.Get()); } var sum = 0; if (state.Exists()) { sum = state.Get(); } state.Update(sum + count); Console.WriteLine("word: {0}, count: {1}", word, sum + count); return new KeyValuePair<string, int>(word, sum + count); }).NumPartitions(1).InitialState(initialState).Timeout(TimeSpan.FromSeconds(30)); var snapshots = wordCounts.MapWithState(stateSpec).StateSnapshots(); snapshots.ForeachRDD((double time, RDD<dynamic> rdd) => { Console.WriteLine("-------------------------------------------"); Console.WriteLine("Snapshots @ Time: {0}", time); Console.WriteLine("-------------------------------------------"); foreach (KeyValuePair<string, int> record in rdd.Collect()) { Console.WriteLine("[{0}, {1}]", record.Key, record.Value); } Console.WriteLine(); }); return context; }); ssc.Start(); StartFileServer(directory, "words.txt", 100); ssc.AwaitTermination(); ssc.Stop(); }
public void TestDStreamUpdateStateByKey() { var ssc = new StreamingContext(new SparkContext("", ""), 1); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); var lines = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(lines.DStreamProxy); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); var doubleCounts = pairs.GroupByKey().FlatMapValues(vs => vs).MapValues(v => 2 * v).ReduceByKey((x, y) => x + y); doubleCounts.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record; Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 2 * 23 : 2 * 22); } }); // disable pipeline to UpdateStateByKey which replys on checkpoint mock proxy doesn't support pairs.Cache(); var state = pairs.UpdateStateByKey<string, int, int>((v, s) => s + (v as List<int>).Count); state.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record; Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 24 : 23); } }); }
public void TestDStreamJoin() { var ssc = new StreamingContext(new SparkContext("", ""), 1); Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy)); var lines = ssc.TextFileStream(Path.GetTempPath()); Assert.IsNotNull(lines.DStreamProxy); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); var wordCounts = pairs.ReduceByKey((x, y) => x + y); var left = wordCounts.Filter(x => x.Key != "quick" && x.Key != "lazy"); var right = wordCounts.Filter(x => x.Key != "brown"); var groupWith = left.GroupWith(right); groupWith.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { KeyValuePair<string, Tuple<List<int>, List<int>>> countByWord = (KeyValuePair<string, Tuple<List<int>, List<int>>>)record; if (countByWord.Key == "quick" || countByWord.Key == "lazy") Assert.AreEqual(countByWord.Value.Item1.Count, 0); else if (countByWord.Key == "brown") Assert.AreEqual(countByWord.Value.Item2.Count, 0); else { Assert.AreEqual(countByWord.Value.Item1[0], countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22); Assert.AreEqual(countByWord.Value.Item2[0], countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22); } } }); var innerJoin = left.Join(right); innerJoin.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 6); foreach (object record in taken) { KeyValuePair<string, Tuple<int, int>> countByWord = (KeyValuePair<string, Tuple<int, int>>)record; Assert.AreEqual(countByWord.Value.Item1, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22); Assert.AreEqual(countByWord.Value.Item2, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22); } }); var leftOuterJoin = left.LeftOuterJoin(right); leftOuterJoin.ForeachRDD((time, rdd) => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 7); foreach (object record in taken) { KeyValuePair<string, Tuple<int, Option<int>>> countByWord = (KeyValuePair<string, Tuple<int, Option<int>>>)record; Assert.AreEqual(countByWord.Value.Item1, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22); Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ? countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 23 : (countByWord.Key == "brown" ? countByWord.Value.Item2.IsDefined == true == false : countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 22)); } }); var rightOuterJoin = left.RightOuterJoin(right); rightOuterJoin.ForeachRDD(rdd => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 8); foreach (object record in taken) { KeyValuePair<string, Tuple<Option<int>, int>> countByWord = (KeyValuePair<string, Tuple<Option<int>, int>>)record; Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ? countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 23 : (countByWord.Key == "quick" || countByWord.Key == "lazy" ? countByWord.Value.Item1.IsDefined == false : countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 22)); Assert.AreEqual(countByWord.Value.Item2, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22); } }); var fullOuterJoin = left.FullOuterJoin(right); fullOuterJoin.ForeachRDD(rdd => { var taken = rdd.Collect(); Assert.AreEqual(taken.Length, 9); foreach (object record in taken) { KeyValuePair<string, Tuple<Option<int>, Option<int>>> countByWord = (KeyValuePair<string, Tuple<Option<int>, Option<int>>>)record; Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ? countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 23 : (countByWord.Key == "quick" || countByWord.Key == "lazy" ? countByWord.Value.Item1.IsDefined == false : countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 22)); Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 23 : (countByWord.Key == "brown" ? countByWord.Value.Item2.IsDefined == false : countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 22)); } }); }