Exemplo n.º 1
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), 10);
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Exemplo n.º 2
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream("127.0.0.1", 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = ssc.KafkaStream("127.0.0.1:2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = ssc.DirectKafkaStream(new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Exemplo n.º 3
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numPartitions.testTopic3", "10");

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numReceivers", "10");

            var directKafkaReceiver = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaReceiver.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Exemplo n.º 4
0
        internal static void DStreamTextFileSamples()
        {
            SparkContext sc = SparkCLRSamples.SparkContext;
            string directory = SparkCLRSamples.Configuration.SampleDataLocation;
            sc.SetCheckpointDir(directory);
            StreamingContext ssc = new StreamingContext(sc, 2000);

            var lines = ssc.TextFileStream(Path.Combine(directory, "test"));
            var words = lines.FlatMap(l => l.Split(' '));
            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
            var wordCounts = pairs.ReduceByKey((x, y) => x + y);
            var join = wordCounts.Join(wordCounts, 2);
            var state = join.UpdateStateByKey<string, Tuple<int, int>, int>((vs, s) => vs.Sum(x => x.Item1 + x.Item2) + s);

            state.ForeachRDD((time, rdd) =>
            {
                // there's chance rdd.Take conflicts with ssc.Stop
                if (stopFileServer)
                    return;

                object[] taken = rdd.Take(10);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                foreach (object record in taken)
                {
                    Console.WriteLine(record);
                }
                Console.WriteLine();

                stopFileServer = count++ > 3;
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);
            while (!stopFileServer)
            {
                System.Threading.Thread.Sleep(1000);
            }

            // wait ForeachRDD to complete to let ssc.Stop() gracefully
            System.Threading.Thread.Sleep(2000);

            ssc.Stop();
        }
Exemplo n.º 5
0
        public void TestStreamingAwaitTimeout()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTerminationOrTimeout(3000);
            ssc.Stop();
        }
Exemplo n.º 6
0
        internal static void DStreamConstantDStreamSample()
        {
            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, 2000L);

            const int count = 100;
            const int partitions = 2;

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), 2);
            var dstream = new ConstantInputDStream<int>(seedRDD, ssc);

            dstream.ForeachRDD((time, rdd) =>
            {
                long batchCount = rdd.Count();
                int numPartitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Count: " + batchCount);
                Console.WriteLine("Partitions: " + numPartitions);
                Assert.AreEqual(count, batchCount);
                Assert.AreEqual(partitions, numPartitions);
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
Exemplo n.º 7
0
        private static void DStreamReduceByKeyAndWindowSample()
        {
            count = 0;

            const long bacthIntervalMs = 2000; // batch interval is in milliseconds
            const int windowDuration = 26;     // window duration in seconds
            const int numPartitions = 2;

            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, bacthIntervalMs);

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), numPartitions);
            var numbers = new ConstantInputDStream<int>(seedRDD, ssc);
            var pairs = numbers.Map(n => new KeyValuePair<int, int>(n % numPartitions, n));
            var reduced = pairs.ReduceByKeyAndWindow(
                    (int x, int y) => (x + y),
                    (int x, int y) => (x - y),
                    windowDuration,
                    slideDuration,
                    numPartitions
                );

            reduced.ForeachRDD((time, rdd) =>
            {
                count++;
                var taken = rdd.Collect();
                int partitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Batch: " + count);
                Console.WriteLine("Count: " + taken.Length);
                Console.WriteLine("Partitions: " + partitions);

                Assert.AreEqual(taken.Length, 2);
                Assert.AreEqual(partitions, numPartitions);

                foreach (object record in taken)
                {
                    KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record;
                    Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value);
                    // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
                    Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500));
                }
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
Exemplo n.º 8
0
        internal static void DStreamCSharpInputSample()
        {
            const int numPartitions = 5;

            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, 2000L); // batch interval is in milliseconds

            var inputDStream = CSharpInputDStreamUtils.CreateStream<string>(
                ssc,
                numPartitions,
                (double time, int pid) =>
                {
                    var list = new List<string>() { string.Format("PluggableInputDStream-{0}-{1}", pid, time) };
                    return list.AsEnumerable();
                });

            inputDStream.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                int partitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Count: " + taken.Length);
                Console.WriteLine("Partitions: " + partitions);

                foreach (object record in taken)
                {
                    Console.WriteLine(record);
                }
            });

            ssc.Start();
            ssc.AwaitTermination();
        }