public void TestSignaturesV2_3_X() { var intMemoryStream = new MemoryStream <int>(_spark); StreamingQuery sq1 = intMemoryStream .ToDF().WriteStream().QueryName("intQuery").Format("console").Start(); string id1 = sq1.Id; var stringMemoryStream = new MemoryStream <string>(_spark); StreamingQuery sq2 = stringMemoryStream .ToDF().WriteStream().QueryName("stringQuery").Format("console").Start(); string id2 = sq2.Id; StreamingQueryManager sqm = _spark.Streams(); StreamingQuery[] streamingQueries = sqm.Active().ToArray(); Assert.Equal(2, streamingQueries.Length); Assert.IsType <StreamingQuery>(sqm.Get(id1)); Assert.IsType <StreamingQuery>(sqm.Get(id2)); sqm.ResetTerminated(); sqm.AwaitAnyTermination(10); sq1.Stop(); sq2.Stop(); }
public void TestSignaturesV2_4_X() { var intMemoryStream = new MemoryStream <int>(_spark); StreamingQuery sq = intMemoryStream .ToDF() .WriteStream() .QueryName("testQuery") .Format("console") .Trigger(Trigger.Once()) .Start(); sq.AwaitTermination(); Assert.IsType <bool>(sq.AwaitTermination(10)); Assert.IsType <string>(sq.Name); Assert.IsType <string>(sq.Id); Assert.IsType <string>(sq.RunId); Assert.IsType <bool>(sq.IsActive()); sq.Explain(); Assert.Null(sq.Exception()); sq.Stop(); }
private static void SetupExitHandlers() { Console.CancelKeyPress += (s, e) => { Console.WriteLine($"{Environment.NewLine}Ctrl+C pressed"); Environment.Exit(0); }; AppDomain.CurrentDomain.ProcessExit += (s, e) => { Console.WriteLine($"{Environment.NewLine}Exiting"); // stop spark query s_query.Stop(); // Allow the main thread to continue and exit... s_waitHandle.Set(); }; }
public void TestSignaturesV3_1_X() { string tableName = "output_table"; WithTable( _spark, new string[] { tableName }, () => { using var tempDirectory = new TemporaryDirectory(); var intMemoryStream = new MemoryStream <int>(_spark); DataStreamWriter dsw = intMemoryStream .ToDF() .WriteStream() .Format("parquet") .Option("checkpointLocation", tempDirectory.Path); StreamingQuery sq = dsw.ToTable(tableName); sq.Stop(); }); }
public void TestForeachBatch() { // Temporary folder to put our test stream input. using var srcTempDirectory = new TemporaryDirectory(); // Temporary folder to write ForeachBatch output. using var dstTempDirectory = new TemporaryDirectory(); Func <Column, Column> outerUdf = Udf <int, int>(i => i + 100); // id column: [0, 1, ..., 9] WriteCsv(0, 10, Path.Combine(srcTempDirectory.Path, "input1.csv")); DataStreamWriter dsw = _spark .ReadStream() .Schema("id INT") .Csv(srcTempDirectory.Path) .WriteStream() .ForeachBatch((df, id) => { Func <Column, Column> innerUdf = Udf <int, int>(i => i + 200); df.Select(outerUdf(innerUdf(Col("id")))) .Write() .Csv(Path.Combine(dstTempDirectory.Path, id.ToString())); }); StreamingQuery sq = dsw.Start(); // Process until all available data in the source has been processed and committed // to the ForeachBatch sink. sq.ProcessAllAvailable(); // Add new file to the source path. The spark stream will read any new files // added to the source path. // id column: [10, 11, ..., 19] WriteCsv(10, 10, Path.Combine(srcTempDirectory.Path, "input2.csv")); // Process until all available data in the source has been processed and committed // to the ForeachBatch sink. sq.ProcessAllAvailable(); sq.Stop(); // Verify folders in the destination path. string[] csvPaths = Directory.GetDirectories(dstTempDirectory.Path).OrderBy(s => s).ToArray(); var expectedPaths = new string[] { Path.Combine(dstTempDirectory.Path, "0"), Path.Combine(dstTempDirectory.Path, "1"), }; Assert.True(expectedPaths.SequenceEqual(csvPaths)); // Read the generated csv paths and verify contents. DataFrame df = _spark .Read() .Schema("id INT") .Csv(csvPaths[0], csvPaths[1]) .Sort("id"); IEnumerable <int> actualIds = df.Collect().Select(r => r.GetAs <int>("id")); Assert.True(Enumerable.Range(300, 20).SequenceEqual(actualIds)); }