コード例 #1
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), 10);
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
コード例 #2
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream("127.0.0.1", 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = ssc.KafkaStream("127.0.0.1:2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = ssc.DirectKafkaStream(new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
コード例 #3
0
ファイル: DStreamSamples.cs プロジェクト: jthelin/SparkCLR
        internal static void DStreamConstantDStreamSample()
        {
            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, 2000L);

            const int count = 100;
            const int partitions = 2;

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), 2);
            var dstream = new ConstantInputDStream<int>(seedRDD, ssc);

            dstream.ForeachRDD((time, rdd) =>
            {
                long batchCount = rdd.Count();
                int numPartitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Count: " + batchCount);
                Console.WriteLine("Partitions: " + numPartitions);
                Assert.AreEqual(count, batchCount);
                Assert.AreEqual(partitions, numPartitions);
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
コード例 #4
0
ファイル: DStreamSamples.cs プロジェクト: jthelin/SparkCLR
        private static void DStreamReduceByKeyAndWindowSample()
        {
            count = 0;

            const long bacthIntervalMs = 2000; // batch interval is in milliseconds
            const int windowDuration = 26;     // window duration in seconds
            const int numPartitions = 2;

            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, bacthIntervalMs);

            // create the RDD
            var seedRDD = sc.Parallelize(Enumerable.Range(0, 100), numPartitions);
            var numbers = new ConstantInputDStream<int>(seedRDD, ssc);
            var pairs = numbers.Map(n => new KeyValuePair<int, int>(n % numPartitions, n));
            var reduced = pairs.ReduceByKeyAndWindow(
                    (int x, int y) => (x + y),
                    (int x, int y) => (x - y),
                    windowDuration,
                    slideDuration,
                    numPartitions
                );

            reduced.ForeachRDD((time, rdd) =>
            {
                count++;
                var taken = rdd.Collect();
                int partitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Batch: " + count);
                Console.WriteLine("Count: " + taken.Length);
                Console.WriteLine("Partitions: " + partitions);

                Assert.AreEqual(taken.Length, 2);
                Assert.AreEqual(partitions, numPartitions);

                foreach (object record in taken)
                {
                    KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record;
                    Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value);
                    // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
                    Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500));
                }
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
コード例 #5
0
ファイル: DStreamSamples.cs プロジェクト: jthelin/SparkCLR
        internal static void DStreamCSharpInputSample()
        {
            const int numPartitions = 5;

            var sc = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, 2000L); // batch interval is in milliseconds

            var inputDStream = CSharpInputDStreamUtils.CreateStream<string>(
                ssc,
                numPartitions,
                (double time, int pid) =>
                {
                    var list = new List<string>() { string.Format("PluggableInputDStream-{0}-{1}", pid, time) };
                    return list.AsEnumerable();
                });

            inputDStream.ForeachRDD((time, rdd) =>
            {
                var taken = rdd.Collect();
                int partitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Count: " + taken.Length);
                Console.WriteLine("Partitions: " + partitions);

                foreach (object record in taken)
                {
                    Console.WriteLine(record);
                }
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
コード例 #6
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numPartitions.testTopic3", "10");

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numReceivers", "10");

            var directKafkaReceiver = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaReceiver.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }