Example #1
0
        internal static void DStreamTextFileSamples()
        {
            count = 0;

            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            SparkContext sc = SparkCLRSamples.SparkContext;
            var          b  = sc.Broadcast <int>(0);

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                StreamingContext context = new StreamingContext(sc, 2000);
                context.Checkpoint(checkpointPath);

                var lines = context.TextFileStream(Path.Combine(directory, "test"));
                lines     = context.Union(lines, lines);
                var words = lines.FlatMap(l => l.Split(' '));
                var pairs = words.Map(w => new KeyValuePair <string, int>(w, 1));

                // since operations like ReduceByKey, Join and UpdateStateByKey are
                // separate dstream transformations defined in CSharpDStream.scala
                // an extra CSharpRDD is introduced in between these operations
                var wordCounts = pairs.ReduceByKey((x, y) => x + y);
                var join       = wordCounts.Join(wordCounts, 2);
                var state      = join.UpdateStateByKey <string, Tuple <int, int>, int>(new UpdateStateHelper(b).Execute);

                state.ForeachRDD((time, rdd) =>
                {
                    // there's chance rdd.Take conflicts with ssc.Stop
                    if (stopFileServer)
                    {
                        return;
                    }

                    object[] taken = rdd.Take(10);
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");
                    foreach (object record in taken)
                    {
                        Console.WriteLine(record);
                    }
                    Console.WriteLine();

                    stopFileServer = count++ > 100;
                });

                return(context);
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Example #2
0
        public void TestBroadcast()
        {
            // Arrange
            long broadcastId = 100L;
            Mock <ISparkContextProxy> sparkContextProxy = new Mock <ISparkContextProxy>();

            sparkContextProxy.Setup(m => m.ReadBroadcastFromFile(It.IsAny <string>(), out broadcastId));
            SparkContext sc            = new SparkContext(sparkContextProxy.Object, null);
            const string expectedValue = "broadcastvar1";

            // Act
            var broadcastVar = sc.Broadcast(expectedValue);

            // Assert
            Assert.IsNotNull(broadcastVar);
            Assert.AreEqual(expectedValue, broadcastVar.Value);
            Assert.AreEqual(broadcastId, broadcastVar.broadcastId);

            sparkContextProxy.Verify(m => m.ReadBroadcastFromFile(It.IsAny <string>(), out broadcastId), Times.Once);
        }
Example #3
0
        internal static void DStreamTextFileSample()
        {
            count = 0;

            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            SparkContext sc = SparkCLRSamples.SparkContext;
            var          b  = sc.Broadcast <int>(0);

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                StreamingContext context = new StreamingContext(sc, 2000L);     // batch interval is in milliseconds
                context.Checkpoint(checkpointPath);

                var lines = context.TextFileStream(Path.Combine(directory, "test"));
                lines     = context.Union(lines, lines);
                var words = lines.FlatMap(l => l.Split(' '));
                var pairs = words.Map(w => new Tuple <string, int>(w, 1));

                // since operations like ReduceByKey, Join and UpdateStateByKey are
                // separate dstream transformations defined in CSharpDStream.scala
                // an extra CSharpRDD is introduced in between these operations
                var wordCounts      = pairs.ReduceByKey((x, y) => x + y);
                var join            = wordCounts.Window(2, 2).Join(wordCounts, 2);
                var initialStateRdd = sc.Parallelize(new[] { new Tuple <string, int>("AAA", 88), new Tuple <string, int>("BBB", 88) });
                var state           = join.UpdateStateByKey(new UpdateStateHelper(b).Execute, initialStateRdd);

                state.ForeachRDD((time, rdd) =>
                {
                    // there's chance rdd.Take conflicts with ssc.Stop
                    if (stopFileServer)
                    {
                        return;
                    }

                    object[] taken = rdd.Take(10);
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");
                    foreach (object record in taken)
                    {
                        Console.WriteLine(record);

                        var countByWord = (Tuple <string, int>)record;
                        Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "lazy" || countByWord.Item1 == "dog" ? 92 : 88);
                    }
                    Console.WriteLine();

                    stopFileServer = true;
                });

                return(context);
            });

            StartFileServer(ssc, directory, "words.txt");

            ssc.Start();

            ssc.AwaitTermination();
        }