public void TestSignaturesV2_3_X()
        {
            var            intMemoryStream = new MemoryStream <int>(_spark);
            StreamingQuery sq1             = intMemoryStream
                                             .ToDF().WriteStream().QueryName("intQuery").Format("console").Start();
            string id1 = sq1.Id;

            var            stringMemoryStream = new MemoryStream <string>(_spark);
            StreamingQuery sq2 = stringMemoryStream
                                 .ToDF().WriteStream().QueryName("stringQuery").Format("console").Start();
            string id2 = sq2.Id;

            StreamingQueryManager sqm = _spark.Streams();

            StreamingQuery[] streamingQueries = sqm.Active().ToArray();
            Assert.Equal(2, streamingQueries.Length);

            Assert.IsType <StreamingQuery>(sqm.Get(id1));
            Assert.IsType <StreamingQuery>(sqm.Get(id2));

            sqm.ResetTerminated();

            sqm.AwaitAnyTermination(10);

            sq1.Stop();
            sq2.Stop();
        }
示例#2
0
        public void TestSignaturesV2_4_X()
        {
            var            intMemoryStream = new MemoryStream <int>(_spark);
            StreamingQuery sq = intMemoryStream
                                .ToDF()
                                .WriteStream()
                                .QueryName("testQuery")
                                .Format("console")
                                .Trigger(Trigger.Once())
                                .Start();

            sq.AwaitTermination();
            Assert.IsType <bool>(sq.AwaitTermination(10));

            Assert.IsType <string>(sq.Name);

            Assert.IsType <string>(sq.Id);

            Assert.IsType <string>(sq.RunId);

            Assert.IsType <bool>(sq.IsActive());

            sq.Explain();

            Assert.Null(sq.Exception());

            sq.Stop();
        }
示例#3
0
        private static void SetupExitHandlers()
        {
            Console.CancelKeyPress += (s, e) =>
            {
                Console.WriteLine($"{Environment.NewLine}Ctrl+C pressed");
                Environment.Exit(0);
            };
            AppDomain.CurrentDomain.ProcessExit += (s, e) =>
            {
                Console.WriteLine($"{Environment.NewLine}Exiting");

                // stop spark query
                s_query.Stop();
                // Allow the main thread to continue and exit...
                s_waitHandle.Set();
            };
        }
示例#4
0
        public void TestSignaturesV3_1_X()
        {
            string tableName = "output_table";

            WithTable(
                _spark,
                new string[] { tableName },
                () =>
            {
                using var tempDirectory = new TemporaryDirectory();
                var intMemoryStream     = new MemoryStream <int>(_spark);
                DataStreamWriter dsw    = intMemoryStream
                                          .ToDF()
                                          .WriteStream()
                                          .Format("parquet")
                                          .Option("checkpointLocation", tempDirectory.Path);

                StreamingQuery sq = dsw.ToTable(tableName);
                sq.Stop();
            });
        }
示例#5
0
        public void TestForeachBatch()
        {
            // Temporary folder to put our test stream input.
            using var srcTempDirectory = new TemporaryDirectory();
            // Temporary folder to write ForeachBatch output.
            using var dstTempDirectory = new TemporaryDirectory();

            Func <Column, Column> outerUdf = Udf <int, int>(i => i + 100);

            // id column: [0, 1, ..., 9]
            WriteCsv(0, 10, Path.Combine(srcTempDirectory.Path, "input1.csv"));

            DataStreamWriter dsw = _spark
                                   .ReadStream()
                                   .Schema("id INT")
                                   .Csv(srcTempDirectory.Path)
                                   .WriteStream()
                                   .ForeachBatch((df, id) =>
            {
                Func <Column, Column> innerUdf = Udf <int, int>(i => i + 200);
                df.Select(outerUdf(innerUdf(Col("id"))))
                .Write()
                .Csv(Path.Combine(dstTempDirectory.Path, id.ToString()));
            });

            StreamingQuery sq = dsw.Start();

            // Process until all available data in the source has been processed and committed
            // to the ForeachBatch sink.
            sq.ProcessAllAvailable();

            // Add new file to the source path. The spark stream will read any new files
            // added to the source path.
            // id column: [10, 11, ..., 19]
            WriteCsv(10, 10, Path.Combine(srcTempDirectory.Path, "input2.csv"));

            // Process until all available data in the source has been processed and committed
            // to the ForeachBatch sink.
            sq.ProcessAllAvailable();
            sq.Stop();

            // Verify folders in the destination path.
            string[] csvPaths =
                Directory.GetDirectories(dstTempDirectory.Path).OrderBy(s => s).ToArray();
            var expectedPaths = new string[]
            {
                Path.Combine(dstTempDirectory.Path, "0"),
                Path.Combine(dstTempDirectory.Path, "1"),
            };

            Assert.True(expectedPaths.SequenceEqual(csvPaths));

            // Read the generated csv paths and verify contents.
            DataFrame df = _spark
                           .Read()
                           .Schema("id INT")
                           .Csv(csvPaths[0], csvPaths[1])
                           .Sort("id");

            IEnumerable <int> actualIds = df.Collect().Select(r => r.GetAs <int>("id"));

            Assert.True(Enumerable.Range(300, 20).SequenceEqual(actualIds));
        }