Beispiel #1
0
        private static void StartFileServer(StreamingContext ssc, string directory, string pattern, int loops = 1)
        {
            string testDir = Path.Combine(directory, "test");

            if (!Directory.Exists(testDir))
            {
                Directory.CreateDirectory(testDir);
            }

            stopFileServer = false;

            string[] files = Directory.GetFiles(directory, pattern);

            Task.Run(() =>
            {
                int loop = 0;
                while (!stopFileServer)
                {
                    if (loop++ < loops)
                    {
                        DateTime now = DateTime.Now;
                        foreach (string path in files)
                        {
                            string text = File.ReadAllText(path);
                            File.WriteAllText(testDir + "\\" + now.ToBinary() + "_" + Path.GetFileName(path), text);
                        }
                    }
                    System.Threading.Thread.Sleep(200);
                }

                ssc.Stop();
            });

            System.Threading.Thread.Sleep(1);
        }
        public void TestStreamingAwaitTimeout()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000);

            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());

            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);

            Assert.IsNotNull(socketStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);

            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTerminationOrTimeout(3000);
            ssc.Stop();
        }
Beispiel #3
0
        internal static void DStreamTextFileSamples()
        {
            count = 0;

            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            SparkContext sc = SparkCLRSamples.SparkContext;
            var          b  = sc.Broadcast <int>(0);

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                StreamingContext context = new StreamingContext(sc, 2000);
                context.Checkpoint(checkpointPath);

                var lines = context.TextFileStream(Path.Combine(directory, "test"));
                lines     = context.Union(lines, lines);
                var words = lines.FlatMap(l => l.Split(' '));
                var pairs = words.Map(w => new KeyValuePair <string, int>(w, 1));

                // since operations like ReduceByKey, Join and UpdateStateByKey are
                // separate dstream transformations defined in CSharpDStream.scala
                // an extra CSharpRDD is introduced in between these operations
                var wordCounts = pairs.ReduceByKey((x, y) => x + y);
                var join       = wordCounts.Join(wordCounts, 2);
                var state      = join.UpdateStateByKey <string, Tuple <int, int>, int>(new UpdateStateHelper(b).Execute);

                state.ForeachRDD((time, rdd) =>
                {
                    // there's chance rdd.Take conflicts with ssc.Stop
                    if (stopFileServer)
                    {
                        return;
                    }

                    object[] taken = rdd.Take(10);
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");
                    foreach (object record in taken)
                    {
                        Console.WriteLine(record);
                    }
                    Console.WriteLine();

                    stopFileServer = count++ > 100;
                });

                return(context);
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Beispiel #4
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1);

            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());

            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);

            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary <string, int> {
                { "testTopic1", 1 }
            }, new Dictionary <string, string>());

            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List <string> {
                "testTopic2"
            }, new Dictionary <string, string>(), new Dictionary <string, long>());

            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List <string> {
                "testTopic3"
            }, new Dictionary <string, string>(), new Dictionary <string, long>(), 10);

            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStreamWithRepartitionAndReadFunc(
                ssc,
                new List <string> {
                "testTopic3"
            },
                new Dictionary <string, string>(), new Dictionary <string, long>(),
                10,
                (int pid, IEnumerable <KeyValuePair <byte[], byte[]> > input) => { return(input); });

            Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);

            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Beispiel #5
0
        static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                Console.WriteLine("Usage: HdfsWordCount <checkpointDirectory> <inputDirectory>");
                return;
            }

            string checkpointPath = args[0];
            string inputDir       = args[1];

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                var sparkConf = new SparkConf();
                sparkConf.SetAppName("HdfsWordCount");
                var sc = new SparkContext(sparkConf);
                StreamingContext context = new StreamingContext(sc, 30000);
                context.Checkpoint(checkpointPath);

                var lines      = context.TextFileStream(inputDir);
                var words      = lines.FlatMap(l => l.Split(' '));
                var pairs      = words.Map(w => new KeyValuePair <string, int>(w, 1));
                var wordCounts = pairs.ReduceByKey((x, y) => x + y);

                wordCounts.ForeachRDD((time, rdd) =>
                {
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");
                    object[] taken = rdd.Take(10);
                    foreach (object record in taken)
                    {
                        Console.WriteLine(record);
                    }
                    Console.WriteLine();
                });

                return(context);
            });

            ssc.Start();
            ssc.AwaitTermination();
            ssc.Stop();
        }
Beispiel #6
0
        internal static void DStreamTextFileSamples()
        {
            SparkContext sc        = SparkCLRSamples.SparkContext;
            string       directory = SparkCLRSamples.Configuration.SampleDataLocation;

            sc.SetCheckpointDir(directory);
            StreamingContext ssc = new StreamingContext(sc, 2000);

            var lines      = ssc.TextFileStream(Path.Combine(directory, "test"));
            var words      = lines.FlatMap(l => l.Split(' '));
            var pairs      = words.Map(w => new KeyValuePair <string, int>(w, 1));
            var wordCounts = pairs.ReduceByKey((x, y) => x + y);
            var join       = wordCounts.Join(wordCounts, 2);
            var state      = join.UpdateStateByKey <string, Tuple <int, int>, int>((vs, s) => vs.Sum(x => x.Item1 + x.Item2) + s);

            state.ForeachRDD((time, rdd) =>
            {
                // there's chance rdd.Take conflicts with ssc.Stop
                if (stopFileServer)
                {
                    return;
                }

                object[] taken = rdd.Take(10);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                foreach (object record in taken)
                {
                    Console.WriteLine(record);
                }
                Console.WriteLine();

                stopFileServer = count++ > 3;
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);

            ssc.AwaitTermination();
            ssc.Stop();
        }
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext(Env.SPARK_MASTER_URL, "xxxx"), 1000);

            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());

            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream("127.0.0.1", 12345);

            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = ssc.KafkaStream("127.0.0.1:2181", "testGroupId", new Dictionary <string, int> {
                { "testTopic1", 1 }
            }, new Dictionary <string, string>());

            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = ssc.DirectKafkaStream(new List <string> {
                "testTopic2"
            }, new Dictionary <string, string>(), new Dictionary <string, long>());

            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);

            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Beispiel #8
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000);

            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());

            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);

            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary <string, int> {
                { "testTopic1", 1 }
            }, new Dictionary <string, string>());

            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List <string> {
                "testTopic2"
            }, new Dictionary <string, string>(), new Dictionary <string, long>());

            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);

            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Beispiel #9
0
        internal static void DStreamMapWithStateSample()
        {
            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                SparkContext sc          = SparkCLRSamples.SparkContext;
                StreamingContext context = new StreamingContext(sc, 10000L);     // batch interval is in milliseconds
                context.Checkpoint(checkpointPath);

                var lines = context.TextFileStream(Path.Combine(directory, "test1"));
                lines     = context.Union(lines, lines);
                var words = lines.FlatMap(l => l.Split(' '));
                var pairs = words.Map(w => new KeyValuePair <string, int>(w, 1));

                var wordCounts   = pairs.ReduceByKey((x, y) => x + y);
                var initialState = sc.Parallelize(new[] { new KeyValuePair <string, int>("NOT_A_WORD", 1024), new KeyValuePair <string, int>("dog", 10000), }, 1);
                var stateSpec    = new StateSpec <string, int, int, KeyValuePair <string, int> >((word, count, state) =>
                {
                    if (state.IsTimingOut())
                    {
                        Console.WriteLine("Found timing out word: {0}", word);
                        return(new KeyValuePair <string, int>(word, state.Get()));
                    }

                    var sum = 0;
                    if (state.Exists())
                    {
                        sum = state.Get();
                    }
                    state.Update(sum + count);
                    Console.WriteLine("word: {0}, count: {1}", word, sum + count);
                    return(new KeyValuePair <string, int>(word, sum + count));
                }).NumPartitions(1).InitialState(initialState).Timeout(TimeSpan.FromSeconds(30));

                var snapshots = wordCounts.MapWithState(stateSpec).StateSnapshots();
                snapshots.ForeachRDD((double time, RDD <dynamic> rdd) =>
                {
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Snapshots @ Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");

                    foreach (KeyValuePair <string, int> record in rdd.Collect())
                    {
                        Console.WriteLine("[{0}, {1}]", record.Key, record.Value);
                    }
                    Console.WriteLine();
                });

                return(context);
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Beispiel #10
0
        public void TestStreamingContext()
        {
            var ssc = new StreamingContext(new SparkContext("", ""), 1000L);

            Assert.IsNotNull((ssc.streamingContextProxy as MockStreamingContextProxy));

            ssc.Start();
            ssc.Remember(1000L);
            ssc.Checkpoint(Path.GetTempPath());

            var textFile = ssc.TextFileStream(Path.GetTempPath());

            Assert.IsNotNull(textFile.DStreamProxy);

            var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);

            Assert.IsNotNull(socketStream.DStreamProxy);

            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new [] { Tuple.Create("testTopic1", 1) }, null);

            Assert.IsNotNull(kafkaStream.DStreamProxy);

            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List <string> {
                "testTopic2"
            }, new List <Tuple <string, string> >(), new List <Tuple <string, long> >());

            Assert.IsNotNull(directKafkaStream.DStreamProxy);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numPartitions.testTopic3", "10");

            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStream(ssc, new List <string> {
                "testTopic3"
            }, new List <Tuple <string, string> >(), new List <Tuple <string, long> >());

            Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);

            var directKafkaStreamWithRepartitionAndReadFunc = KafkaUtils.CreateDirectStream(
                ssc,
                new List <string> {
                "testTopic3"
            },
                new List <Tuple <string, string> >(), new List <Tuple <string, long> >(),
                (int pid, IEnumerable <Tuple <byte[], byte[]> > input) => { return(input); });

            Assert.IsNotNull(directKafkaStreamWithRepartitionAndReadFunc);

            ssc.SparkContext.SparkConf.Set("spark.mobius.streaming.kafka.numReceivers", "10");

            var directKafkaReceiver = KafkaUtils.CreateDirectStream(
                ssc,
                new List <string> {
                "testTopic3"
            },
                new List <Tuple <string, string> >(), new List <Tuple <string, long> >(),
                (int pid, IEnumerable <Tuple <byte[], byte[]> > input) => { return(input); });

            Assert.IsNotNull(directKafkaReceiver.DStreamProxy);

            var union = ssc.Union(textFile, socketStream);

            Assert.IsNotNull(union.DStreamProxy);

            ssc.AwaitTermination();
            ssc.Stop();
        }
Beispiel #11
0
        public static void Main(string[] args)
        {
            Logger.LogInfo(EnvironmentInfo);
            var config = AppDomain.CurrentDomain.SetupInformation.ConfigurationFile;

            var isParseOK = false;

            //Options = ParserByCommandLine.Parse(args, out isParseOK);
            Options = ArgParser.Parse <ArgOptions>(args, out isParseOK, "-Help");

            if (!isParseOK)
            {
                return;
            }

            Logger.LogDebug("{0} configuration {1}", File.Exists(config) ? "Exist" : "Not Exist", config);

            if (Options.WaitSecondsForAttachDebug > 0)
            {
                var waitBegin  = DateTime.Now;
                var waitEnd    = waitBegin + TimeSpan.FromSeconds(Options.WaitSecondsForAttachDebug);
                var currentPID = Process.GetCurrentProcess().Id;
                Logger.LogWarn($"Will wait {Options.WaitSecondsForAttachDebug} seconds for you to debug this process : please attach PID {currentPID} before {waitEnd}");
                Thread.Sleep(Options.WaitSecondsForAttachDebug * 1000);
            }

            Logger.LogInfo("will connect " + Options.Host + ":" + Options.Port + " batchSeconds = " + Options.BatchSeconds + " s , windowSeconds = " + Options.WindowSeconds + " s, slideSeconds = " + Options.SlideSeconds + " s."
                           + " checkpointDirectory = " + Options.CheckPointDirectory + ", is-array-test = " + Options.IsArrayValue);

            var prefix = ExeName + (Options.IsArrayValue ? "-array" + (Options.IsUnevenArray ? "-uneven" : "-even") : "-single");

            var beginTime = DateTime.Now;

            var sc = new SparkContext(new SparkConf());

            Action <long> testOneStreaming = (testTime) =>
            {
                var timesInfo = "[" + testTime + "]-" + Options.TestTimes + " ";
                Logger.LogInfo($"Begin test{timesInfo} : {GetCurrentProcessInfo()}");
                if (Options.DeleteCheckPointDirectoryTimes >= testTime)
                {
                    TestUtils.DeleteDirectory(Options.CheckPointDirectory);
                }

                var ssc = new StreamingContext(sc, Options.BatchSeconds * 1000L);
                ssc.Checkpoint(Options.CheckPointDirectory);
                var lines = ssc.SocketTextStream(Options.Host, Options.Port, StorageLevelType.MEMORY_AND_DISK_SER);


                var oldSum = new SumCount(SumCountStatic.GetStaticSumCount());
                StartOneTest(sc, lines, Options.ElementCount, prefix);
                var newSum = SumCountStatic.GetStaticSumCount();
                // var sum = newSum - oldSum; // newSum maybe same as oldSum

                ssc.Start();
                var startTime = DateTime.Now;
                ssc.AwaitTerminationOrTimeout(Options.RunningSeconds * 1000);
                ssc.Stop();

                var sum             = newSum - oldSum;
                var isSameLineCount = Options.LineCount <= 0 || Options.LineCount == sum.LineCount;
                var message         = Options.LineCount <= 0 ? string.Empty :
                                      (isSameLineCount ? ". LineCount same" : string.Format(". LineCount different : expected = {0}, but line count = {1}", Options.LineCount, sum.LineCount));

                Logger.LogInfo("oldSum = {0}, newSum = {1}, sum = {2}", oldSum, newSum, sum);
                Logger.LogInfo($"End test{timesInfo}, used time = {(DateTime.Now - startTime).TotalSeconds} s, total cost = {(DateTime.Now - beginTime).TotalSeconds} s, started at {startTime.ToString(TestUtils.MilliTimeFormat)} . Reduced final sumCount : {sum.ToString()} {message}. {GetCurrentProcessInfo()}");
            };

            for (var times = 1; times <= Options.TestTimes; times++)
            {
                testOneStreaming(times);
                if (times < Options.TestTimes)
                {
                    Thread.Sleep(TimeSpan.FromSeconds(Options.TestIntervalSeconds));
                }
            }

            Logger.LogInfo($"Finished all tests, test times = {Options.TestTimes}, used time = {(DateTime.Now - beginTime).TotalSeconds} s = {DateTime.Now - beginTime} . {GetCurrentProcessInfo(true, "Final info: ")}");
        }