public static void RunSparkStream(string streamInputPath) { var foreachWriter = new TestForeachWriter(); SparkSession spark = SparkSession .Builder() .AppName("itur") .GetOrCreate(); var mySchema = new Microsoft.Spark.Sql.Types.StructType(new[] { new StructField("IturCode", new Microsoft.Spark.Sql.Types.StringType()), new StructField("IturERP", new Microsoft.Spark.Sql.Types.StringType()), new StructField("QuantityEdit", new Microsoft.Spark.Sql.Types.StringType()), new StructField("PartialQuantity", new Microsoft.Spark.Sql.Types.StringType()) }); DataFrame lines = spark .ReadStream() .Schema(mySchema) .Csv(streamInputPath); s_query = lines .WriteStream() .Foreach(foreachWriter) .Trigger(Trigger.ProcessingTime(5000)) .Start(); s_query.AwaitTermination(); }
public string SparkTest([FromServices] IAWSSettings awsSettings) { string result = "ok"; try { SparkSession spark = SparkSession .Builder() .AppName("itur") .GetOrCreate(); var mySchema = new Microsoft.Spark.Sql.Types.StructType(new[] { new StructField("IturCode", new Microsoft.Spark.Sql.Types.StringType()), new StructField("IturERP", new Microsoft.Spark.Sql.Types.StringType()), new StructField("QuantityEdit", new Microsoft.Spark.Sql.Types.StringType()), new StructField("PartialQuantity", new Microsoft.Spark.Sql.Types.StringType()) }); string assemblyLocation = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); string iturInputPath = Path.Combine(assemblyLocation, "data", "itur.csv"); DataFrame df = spark.Read() .Format("csv") .Schema(mySchema) .Option("delimiter", ",") .Option("header", true) //.Option("dateFormat", "dd/MM/yyyy") .Load(iturInputPath); string dt = DateTime.Now.ToString("MMddhhmmss"); string outputfile = Path.Combine(assemblyLocation, "outputData", $"itur_out{dt}.json"); df.Write().Json(outputfile); //string toPath = $"s3n://{awsSettings.AccessKey}:{awsSettings.SecretKey}@{_bucketName}/{path}"; //spark.Range(100).Repartition(5).Write().Mode("overwrite").Text(toPath) ; spark.Stop(); } catch (Exception ex) { result = ex.Message; } return(result); }
public void Run(string[] args) { if (args.Length != 1) { Console.Error.WriteLine( "Usage: Sql.VectorDataFrameUdfs <path to SPARK_HOME/examples/src/main/resources/people.json>"); Environment.Exit(1); } SparkSession spark = SparkSession .Builder() // Lower the shuffle partitions to speed up groupBy() operations. .Config("spark.sql.shuffle.partitions", "3") .AppName("SQL VectorUdfs example using .NET for Apache Spark") .GetOrCreate(); DataFrame df = spark.Read().Schema("age INT, name STRING").Json(args[0]); StructType schema = df.Schema(); Console.WriteLine(schema.SimpleString); df.Show(); df.PrintSchema(); // Grouped Map Vector UDF // able to return different shapes and record lengths df.GroupBy("age") .Apply( new StructType(new[] { new StructField("age", new IntegerType()), new StructField("nameCharCount", new IntegerType()) }), r => CountCharacters(r)) .Show(); spark.Stop(); }