Esempio n. 1
0
        public static void RunSparkStream(string streamInputPath)
        {
            var foreachWriter = new TestForeachWriter();

            SparkSession spark = SparkSession
                                 .Builder()
                                 .AppName("itur")
                                 .GetOrCreate();


            var mySchema = new Microsoft.Spark.Sql.Types.StructType(new[]
            {
                new StructField("IturCode", new Microsoft.Spark.Sql.Types.StringType()),
                new StructField("IturERP", new Microsoft.Spark.Sql.Types.StringType()),
                new StructField("QuantityEdit", new Microsoft.Spark.Sql.Types.StringType()),
                new StructField("PartialQuantity", new Microsoft.Spark.Sql.Types.StringType())
            });

            DataFrame lines = spark
                              .ReadStream()
                              .Schema(mySchema)
                              .Csv(streamInputPath);

            s_query = lines
                      .WriteStream()
                      .Foreach(foreachWriter)
                      .Trigger(Trigger.ProcessingTime(5000))
                      .Start();


            s_query.AwaitTermination();
        }
Esempio n. 2
0
        public string SparkTest([FromServices] IAWSSettings awsSettings)
        {
            string result = "ok";

            try
            {
                SparkSession spark = SparkSession
                                     .Builder()
                                     .AppName("itur")
                                     .GetOrCreate();

                var mySchema = new Microsoft.Spark.Sql.Types.StructType(new[]
                {
                    new StructField("IturCode", new Microsoft.Spark.Sql.Types.StringType()),
                    new StructField("IturERP", new Microsoft.Spark.Sql.Types.StringType()),
                    new StructField("QuantityEdit", new Microsoft.Spark.Sql.Types.StringType()),
                    new StructField("PartialQuantity", new Microsoft.Spark.Sql.Types.StringType())
                });

                string assemblyLocation = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
                string iturInputPath    = Path.Combine(assemblyLocation, "data", "itur.csv");

                DataFrame df = spark.Read()
                               .Format("csv")
                               .Schema(mySchema)
                               .Option("delimiter", ",")
                               .Option("header", true)
                               //.Option("dateFormat", "dd/MM/yyyy")
                               .Load(iturInputPath);

                string dt         = DateTime.Now.ToString("MMddhhmmss");
                string outputfile = Path.Combine(assemblyLocation, "outputData", $"itur_out{dt}.json");
                df.Write().Json(outputfile);

                //string toPath = $"s3n://{awsSettings.AccessKey}:{awsSettings.SecretKey}@{_bucketName}/{path}";
                //spark.Range(100).Repartition(5).Write().Mode("overwrite").Text(toPath) ;

                spark.Stop();
            }
            catch (Exception ex)
            {
                result = ex.Message;
            }
            return(result);
        }
Esempio n. 3
0
        public void Run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.Error.WriteLine(
                    "Usage: Sql.VectorDataFrameUdfs <path to SPARK_HOME/examples/src/main/resources/people.json>");
                Environment.Exit(1);
            }

            SparkSession spark = SparkSession
                                 .Builder()
                                 // Lower the shuffle partitions to speed up groupBy() operations.
                                 .Config("spark.sql.shuffle.partitions", "3")
                                 .AppName("SQL VectorUdfs example using .NET for Apache Spark")
                                 .GetOrCreate();

            DataFrame df = spark.Read().Schema("age INT, name STRING").Json(args[0]);

            StructType schema = df.Schema();

            Console.WriteLine(schema.SimpleString);

            df.Show();

            df.PrintSchema();

            // Grouped Map Vector UDF
            // able to return different shapes and record lengths
            df.GroupBy("age")
            .Apply(
                new StructType(new[]
            {
                new StructField("age", new IntegerType()),
                new StructField("nameCharCount", new IntegerType())
            }),
                r => CountCharacters(r))
            .Show();

            spark.Stop();
        }