Esempio n. 1
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), 10);
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Esempio n. 2
0
        private static void StartFileServer(StreamingContext ssc, string directory, string pattern, int loops = 1)
        {
            string testDir = Path.Combine(directory, "test");
            if (!Directory.Exists(testDir))
                Directory.CreateDirectory(testDir);

            stopFileServer = false;

            string[] files = Directory.GetFiles(directory, pattern);

            Task.Run(() =>
            {
                int loop = 0;
                while (!stopFileServer)
                {
                    if (loop++ < loops)
                    {
                        DateTime now = DateTime.Now;
                        foreach (string path in files)
                        {
                            string text = File.ReadAllText(path);
                            File.WriteAllText(testDir + "\\" + now.ToBinary() + "_" + Path.GetFileName(path), text);
                        }
                    }
                    System.Threading.Thread.Sleep(200);
                }

                ssc.Stop();
            });
            
            System.Threading.Thread.Sleep(1);
        }
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream("127.0.0.1", 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = ssc.KafkaStream("127.0.0.1:2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = ssc.DirectKafkaStream(new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Esempio n. 4
0
        internal static void DStreamTextFileSamples()
        {
            SparkContext sc = SparkCLRSamples.SparkContext;
            string directory = SparkCLRSamples.Configuration.SampleDataLocation;
            sc.SetCheckpointDir(directory);
            StreamingContext ssc = new StreamingContext(sc, 2000);

            var lines = ssc.TextFileStream(Path.Combine(directory, "test"));
            var words = lines.FlatMap(l => l.Split(' '));
            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
            var wordCounts = pairs.ReduceByKey((x, y) => x + y);
            var join = wordCounts.Join(wordCounts, 2);
            var state = join.UpdateStateByKey<string, Tuple<int, int>, int>((vs, s) => vs.Sum(x => x.Item1 + x.Item2) + s);

            state.ForeachRDD((time, rdd) =>
            {
                // there's chance rdd.Take conflicts with ssc.Stop
                if (stopFileServer)
                    return;

                object[] taken = rdd.Take(10);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                foreach (object record in taken)
                {
                    Console.WriteLine(record);
                }
                Console.WriteLine();

                stopFileServer = count++ > 3;
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);
            while (!stopFileServer)
            {
                System.Threading.Thread.Sleep(1000);
            }

            // wait ForeachRDD to complete to let ssc.Stop() gracefully
            System.Threading.Thread.Sleep(2000);

            ssc.Stop();
        }
Esempio n. 5
0
        public void TestStreamingAwaitTimeout()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTerminationOrTimeout(3000);
            ssc.Stop();
        }
Esempio n. 6
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);
            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());
            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numPartitions.testTopic3", "10");

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>());
            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numReceivers", "10");

            var directKafkaReceiver = KafkaUtils.CreateDirectStream(
                ssc,
                new List<string> { "testTopic3" },
                new Dictionary<string, string>(), new Dictionary<string, long>(),
                (int pid, IEnumerable<KeyValuePair<byte[], byte[]>> input) => { return input; });
            Assert.IsNotNull(directKafkaReceiver.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);
            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }