Пример #1
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);

            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());

            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);

            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary <string, int> {
                { "testTopic1", 1 }
            }, new Dictionary <string, string>());

            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List <string> {
                "testTopic2"
            }, new Dictionary <string, string>(), new Dictionary <string, long>());

            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List <string> {
                "testTopic3"
            }, new Dictionary <string, string>(), new Dictionary <string, long>(), 10);

            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStreamWithRepartitionAndReadFunc(
                ssc,
                new List <string> {
                "testTopic3"
            },
                new Dictionary <string, string>(), new Dictionary <string, long>(),
                10,
                (int pid, IEnumerable <KeyValuePair <byte[], byte[]> > input) => { return(input); });

            Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);

            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Пример #2
0
        internal static void DStreamDirectKafkaWithRepartitionSample()
        {
            count = 0;

            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                SparkContext sc          = SparkCLRSamples.SparkContext;
                StreamingContext context = new StreamingContext(sc, 2);
                context.Checkpoint(checkpointPath);

                var kafkaParams = new Dictionary <string, string> {
                    { "metadata.broker.list", brokers },
                    { "auto.offset.reset", "smallest" }
                };

                var dstream = KafkaUtils.CreateDirectStreamWithRepartition(context, new List <string> {
                    topic
                }, kafkaParams, new Dictionary <string, long>(), partitions);

                dstream.ForeachRDD((time, rdd) =>
                {
                    long batchCount   = rdd.Count();
                    int numPartitions = rdd.GetNumPartitions();

                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Count: " + batchCount);
                    Console.WriteLine("Partitions: " + numPartitions);

                    // only first batch has data and is repartitioned into 10 partitions
                    if (count++ == 0)
                    {
                        Assert.AreEqual(messages, batchCount);
                        Assert.IsTrue(numPartitions >= partitions);
                    }
                    else
                    {
                        Assert.AreEqual(0, batchCount);
                        Assert.IsTrue(numPartitions == 0);
                    }
                });

                return(context);
            });

            ssc.Start();
            ssc.AwaitTermination();
        }