Ejemplo n.º 1
0
        public void TestDStreamMapReduce()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(lines.DStreamProxy);

            var words = lines.FlatMap(l => l.Split(' ')).Filter(w => w != "The").Repartition(1);

            words.Slice(DateTime.MinValue, DateTime.MaxValue);
            words.Cache();
            words.Checkpoint(1);
            words.Window(1, 1);

            words.Count().ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 1);
                Assert.AreEqual((int)taken[0], 178);
            });

            words.CountByValue().ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 8);

                foreach (object record in taken)
                {
                    KeyValuePair<string, long> countByWord = (KeyValuePair<string, long>)record;
                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
                }
            });

            words.CountByValueAndWindow(1, 1).ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken[0], 8);
            });

            words.CountByWindow(1).ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 1);
                Assert.AreEqual((int)taken[0], 356);
            });

            words.Union(words).ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 356);
            });

            words.Glom().ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 1);
                Assert.AreEqual((taken[0] as string[]).Length, 178);
            });
        }
Ejemplo n.º 2
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), 10);
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Ejemplo n.º 3
0
        internal static void DStreamTextFileSample()
        {
            count = 0;

            string directory = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            SparkContext sc = SparkCLRSamples.SparkContext;
            var b = sc.Broadcast<int>(0);

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                () =>
                {

                    StreamingContext context = new StreamingContext(sc, 2);
                    context.Checkpoint(checkpointPath);

                    var lines = context.TextFileStream(Path.Combine(directory, "test"));
                    lines = context.Union(lines, lines);
                    var words = lines.FlatMap(l => l.Split(' '));
                    var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));

                    // since operations like ReduceByKey, Join and UpdateStateByKey are
                    // separate dstream transformations defined in CSharpDStream.scala
                    // an extra CSharpRDD is introduced in between these operations
                    var wordCounts = pairs.ReduceByKey((x, y) => x + y);
                    var join = wordCounts.Window(2, 2).Join(wordCounts, 2);
                    var state = join.UpdateStateByKey<string, Tuple<int, int>, int>(new UpdateStateHelper(b).Execute);

                    state.ForeachRDD((time, rdd) =>
                    {
                        // there's chance rdd.Take conflicts with ssc.Stop
                        if (stopFileServer)
                            return;

                        object[] taken = rdd.Take(10);
                        Console.WriteLine("-------------------------------------------");
                        Console.WriteLine("Time: {0}", time);
                        Console.WriteLine("-------------------------------------------");
                        foreach (object record in taken)
                        {
                            Console.WriteLine(record);
                            
                            var countByWord = (KeyValuePair<string, int>)record;
                            Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "lazy" || countByWord.Key == "dog" ? 92 : 88);
                        }
                        Console.WriteLine();

                        stopFileServer = true;
                    });

                    return context;
                });

            StartFileServer(ssc, directory, "words.txt");

            ssc.Start();

            ssc.AwaitTermination();
        }
Ejemplo n.º 4
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream("127.0.0.1", 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = ssc.KafkaStream("127.0.0.1:2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = ssc.DirectKafkaStream(new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Ejemplo n.º 5
0
        internal static void DStreamTextFileSamples()
        {
            count = 0;

            string directory = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                () =>
                {
                    SparkContext sc = SparkCLRSamples.SparkContext;
                    StreamingContext context = new StreamingContext(sc, 2000);
                    context.Checkpoint(checkpointPath);

                    var lines = context.TextFileStream(Path.Combine(directory, "test"));
                    lines = context.Union(lines, lines);
                    var words = lines.FlatMap(l => l.Split(' '));
                    var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));

                    // since operations like ReduceByKey, Join and UpdateStateByKey are
                    // separate dstream transformations defined in CSharpDStream.scala
                    // an extra CSharpRDD is introduced in between these operations
                    var wordCounts = pairs.ReduceByKey((x, y) => x + y);
                    var join = wordCounts.Join(wordCounts, 2);
                    var state = join.UpdateStateByKey<string, Tuple<int, int>, int>((vs, s) => vs.Sum(x => x.Item1 + x.Item2) + s);

                    state.ForeachRDD((time, rdd) =>
                    {
                        // there's chance rdd.Take conflicts with ssc.Stop
                        if (stopFileServer)
                            return;

                        object[] taken = rdd.Take(10);
                        Console.WriteLine("-------------------------------------------");
                        Console.WriteLine("Time: {0}", time);
                        Console.WriteLine("-------------------------------------------");
                        foreach (object record in taken)
                        {
                            Console.WriteLine(record);
                        }
                        Console.WriteLine();

                        stopFileServer = count++ > 100;
                    });

                    return context;
                });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Ejemplo n.º 6
0
        public void TestDStreamTransform()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(lines.DStreamProxy);

            var words = lines.FlatMap(l => l.Split(' '));

            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));

            var wordCounts = pairs.PartitionBy().ReduceByKey((x, y) => x + y);

            wordCounts.ForeachRDD((time, rdd) => 
                {
                    var taken = rdd.Collect();
                    Assert.AreEqual(taken.Length, 9);

                    foreach (object record in taken)
                    {
                        KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
                        Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
                    }
                });

            var wordLists = pairs.GroupByKey();

            wordLists.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);

                foreach (object record in taken)
                {
                    KeyValuePair<string, List<int>> countByWord = (KeyValuePair<string, List<int>>)record;
                    Assert.AreEqual(countByWord.Value.Count, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
                }
            });

            var wordCountsByWindow = pairs.ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y, 1);

            wordCountsByWindow.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);

                foreach (object record in taken)
                {
                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 46 : 44);
                }
            });
        }
Ejemplo n.º 7
0
        public void TestDStreamTransform_Moq()
        {
            // Arrange
            var mockDStreamProxy = new Mock <IDStreamProxy>();

            _mockStreamingContextProxy.Setup(m => m.TextFileStream(It.Is <string>(d => d == Path.GetTempPath()))).Returns(mockDStreamProxy.Object);

            mockDStreamProxy.Setup(m => m.CallForeachRDD(It.IsAny <byte[]>(), It.IsAny <string>())).Callback <byte[], string>(
                (func, deserializer) =>
            {
                Action <double, RDD <dynamic> > f = (Action <double, RDD <dynamic> >) new BinaryFormatter().Deserialize(new MemoryStream(func));
                f(DateTime.UtcNow.Ticks, new RDD <dynamic>(_mockRddProxy.Object, new SparkContext("", "")));
            });


            IRDDProxy functionedRddProxy = null;

            mockDStreamProxy.Setup(m => m.AsJavaDStream()).Returns(mockDStreamProxy.Object);

            _mockSparkCLRProxy.Setup(m => m.StreamingContextProxy.CreateCSharpDStream(It.IsAny <IDStreamProxy>(), It.IsAny <byte[]>(), It.IsAny <string>()))
            .Returns <IDStreamProxy, byte[], string>((jdstream, func, deserializer) =>
            {
                Func <double, RDD <dynamic>, RDD <dynamic> > f = (Func <double, RDD <dynamic>, RDD <dynamic> >) new BinaryFormatter().Deserialize(new MemoryStream(func));
                RDD <dynamic> rdd = f(DateTime.UtcNow.Ticks,
                                      new RDD <dynamic>(functionedRddProxy ?? _mockRddProxy.Object, new SparkContext("", "")));
                functionedRddProxy = rdd.RddProxy;
                return(mockDStreamProxy.Object);
            });

            // Act
            var lines      = _streamingContext.TextFileStream(Path.GetTempPath());
            var words      = lines.FlatMap(l => l.Split(' '));
            var pairs      = words.Map(w => new Tuple <string, int>(w, 1));
            var wordCounts = pairs.ReduceByKey((x, y) => x + y);

            // Assert
            wordCounts.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);

                foreach (object record in taken)
                {
                    Tuple <string, int> countByWord = (Tuple <string, int>)record;
                    Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22);
                }
            });
            // Use Verify to verify if a method to mock was invoked
            mockDStreamProxy.Verify(m => m.CallForeachRDD(It.IsAny <byte[]>(), It.IsAny <string>()));
        }
Ejemplo n.º 8
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numPartitions.testTopic3", "10");

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numReceivers", "10");

            var directKafkaReceiver = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaReceiver.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Ejemplo n.º 9
0
        internal static void DStreamTextFileSamples()
        {
            SparkContext sc = SparkCLRSamples.SparkContext;
            string directory = SparkCLRSamples.Configuration.SampleDataLocation;
            sc.SetCheckpointDir(directory);
            StreamingContext ssc = new StreamingContext(sc, 2000);

            var lines = ssc.TextFileStream(Path.Combine(directory, "test"));
            var words = lines.FlatMap(l => l.Split(' '));
            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
            var wordCounts = pairs.ReduceByKey((x, y) => x + y);
            var join = wordCounts.Join(wordCounts, 2);
            var state = join.UpdateStateByKey<string, Tuple<int, int>, int>((vs, s) => vs.Sum(x => x.Item1 + x.Item2) + s);

            state.ForeachRDD((time, rdd) =>
            {
                // there's chance rdd.Take conflicts with ssc.Stop
                if (stopFileServer)
                    return;

                object[] taken = rdd.Take(10);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                foreach (object record in taken)
                {
                    Console.WriteLine(record);
                }
                Console.WriteLine();

                stopFileServer = count++ > 3;
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);
            while (!stopFileServer)
            {
                System.Threading.Thread.Sleep(1000);
            }

            // wait ForeachRDD to complete to let ssc.Stop() gracefully
            System.Threading.Thread.Sleep(2000);

            ssc.Stop();
        }
Ejemplo n.º 10
0
        static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                Console.WriteLine("Usage: HdfsWordCount <checkpointDirectory> <inputDirectory>");
                return;
            }

            string checkpointPath = args[0];
            string inputDir = args[1];

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                () =>
                {
                    var sparkConf = new SparkConf();
                    sparkConf.SetAppName("HdfsWordCount");
                    var sc = new SparkContext(sparkConf);
                    StreamingContext context = new StreamingContext(sc, 30000);
                    context.Checkpoint(checkpointPath);

                    var lines = context.TextFileStream(inputDir);
                    var words = lines.FlatMap(l => l.Split(' '));
                    var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
                    var wordCounts = pairs.ReduceByKey((x, y) => x + y);

                    wordCounts.ForeachRDD((time, rdd) =>
                    {
                        Console.WriteLine("-------------------------------------------");
                        Console.WriteLine("Time: {0}", time);
                        Console.WriteLine("-------------------------------------------");
                        object[] taken = rdd.Take(10);
                        foreach (object record in taken)
                        {
                            Console.WriteLine(record);
                        }
                        Console.WriteLine();
                    });

                    return context;
                });

            ssc.Start();
            ssc.AwaitTermination();
            ssc.Stop();
        }
Ejemplo n.º 11
0
        public void TestStreamingAwaitTimeout()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTerminationOrTimeout(3000);
            ssc.Stop();
        }
Ejemplo n.º 12
0
        internal static void DStreamMapWithStateSample()
        {
            string directory = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                () =>
                {
                    SparkContext sc = SparkCLRSamples.SparkContext;
                    StreamingContext context = new StreamingContext(sc, 10000);
                    context.Checkpoint(checkpointPath);

                    var lines = context.TextFileStream(Path.Combine(directory, "test1"));
                    lines = context.Union(lines, lines);
                    var words = lines.FlatMap(l => l.Split(' '));
                    var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));

                    var wordCounts = pairs.ReduceByKey((x, y) => x + y);
                    var initialState = sc.Parallelize(new[] { new KeyValuePair<string, int>("NOT_A_WORD", 1024), new KeyValuePair<string, int>("dog", 10000), }, 1);
                    var stateSpec = new StateSpec<string, int, int, KeyValuePair<string, int>>((word, count, state) =>
                    {
                        if (state.IsTimingOut())
                        {
                            Console.WriteLine("Found timing out word: {0}", word);
                            return new KeyValuePair<string, int>(word, state.Get());
                        }

                        var sum = 0;
                        if (state.Exists())
                        {
                            sum = state.Get();
                        }
                        state.Update(sum + count);
                        Console.WriteLine("word: {0}, count: {1}", word, sum + count);
                        return new KeyValuePair<string, int>(word, sum + count);
                    }).NumPartitions(1).InitialState(initialState).Timeout(TimeSpan.FromSeconds(30));

                    var snapshots = wordCounts.MapWithState(stateSpec).StateSnapshots();
                    snapshots.ForeachRDD((double time, RDD<dynamic> rdd) =>
                    {
                        Console.WriteLine("-------------------------------------------");
                        Console.WriteLine("Snapshots @ Time: {0}", time);
                        Console.WriteLine("-------------------------------------------");

                        foreach (KeyValuePair<string, int> record in rdd.Collect())
                        {
                            Console.WriteLine("[{0}, {1}]", record.Key, record.Value);
                        }
                        Console.WriteLine();
                    });

                    return context;
                });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Ejemplo n.º 13
0
        public void TestDStreamUpdateStateByKey()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(lines.DStreamProxy);

            var words = lines.FlatMap(l => l.Split(' '));

            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));

            var doubleCounts = pairs.GroupByKey().FlatMapValues(vs => vs).MapValues(v => 2 * v).ReduceByKey((x, y) => x + y);
            doubleCounts.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);

                foreach (object record in taken)
                {
                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 2 * 23 : 2 * 22);
                }
            });

            // disable pipeline to UpdateStateByKey which replys on checkpoint mock proxy doesn't support
            pairs.Cache();

            var state = pairs.UpdateStateByKey<string, int, int>((v, s) => s + (v as List<int>).Count);
            state.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);

                foreach (object record in taken)
                {
                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 24 : 23);
                }
            });
        }
Ejemplo n.º 14
0
        public void TestDStreamJoin()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            var lines = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(lines.DStreamProxy);

            var words = lines.FlatMap(l => l.Split(' '));

            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));

            var wordCounts = pairs.ReduceByKey((x, y) => x + y);

            var left = wordCounts.Filter(x => x.Key != "quick" && x.Key != "lazy");
            var right = wordCounts.Filter(x => x.Key != "brown");

            var groupWith = left.GroupWith(right);
            groupWith.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);

                foreach (object record in taken)
                {
                    KeyValuePair<string, Tuple<List<int>, List<int>>> countByWord = (KeyValuePair<string, Tuple<List<int>, List<int>>>)record;
                    if (countByWord.Key == "quick" || countByWord.Key == "lazy")
                        Assert.AreEqual(countByWord.Value.Item1.Count, 0);
                    else if (countByWord.Key == "brown")
                        Assert.AreEqual(countByWord.Value.Item2.Count, 0);
                    else
                    {
                        Assert.AreEqual(countByWord.Value.Item1[0], countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
                        Assert.AreEqual(countByWord.Value.Item2[0], countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
                    }
                }
            });

            var innerJoin = left.Join(right);
            innerJoin.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 6);

                foreach (object record in taken)
                {
                    KeyValuePair<string, Tuple<int, int>> countByWord = (KeyValuePair<string, Tuple<int, int>>)record;
                    Assert.AreEqual(countByWord.Value.Item1, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
                    Assert.AreEqual(countByWord.Value.Item2, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
                }
            });

            var leftOuterJoin = left.LeftOuterJoin(right);
            leftOuterJoin.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 7);

                foreach (object record in taken)
                {
                    KeyValuePair<string, Tuple<int, Option<int>>> countByWord = (KeyValuePair<string, Tuple<int, Option<int>>>)record;
                    Assert.AreEqual(countByWord.Value.Item1, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ? 
                        countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 23 : (countByWord.Key == "brown" ?
                        countByWord.Value.Item2.IsDefined == true == false : countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 22));
                }
            });

            var rightOuterJoin = left.RightOuterJoin(right);
            rightOuterJoin.ForeachRDD(rdd =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 8);

                foreach (object record in taken)
                {
                    KeyValuePair<string, Tuple<Option<int>, int>> countByWord = (KeyValuePair<string, Tuple<Option<int>, int>>)record;
                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ? 
                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 23 : 
                        (countByWord.Key == "quick" || countByWord.Key == "lazy" ? countByWord.Value.Item1.IsDefined == false :
                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 22));
                    Assert.AreEqual(countByWord.Value.Item2, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
                }
            });
            
            var fullOuterJoin = left.FullOuterJoin(right);
            fullOuterJoin.ForeachRDD(rdd =>
            {
                var taken = rdd.Collect();
                Assert.AreEqual(taken.Length, 9);

                foreach (object record in taken)
                {
                    KeyValuePair<string, Tuple<Option<int>, Option<int>>> countByWord = (KeyValuePair<string, Tuple<Option<int>, Option<int>>>)record;
                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ?
                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 23 :
                        (countByWord.Key == "quick" || countByWord.Key == "lazy" ? countByWord.Value.Item1.IsDefined == false :
                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 22));

                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 
                        countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 23 : 
                        (countByWord.Key == "brown" ? countByWord.Value.Item2.IsDefined == false : countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 22));
                }
            });
        }