Esempio n. 1
0
        public void Run(string[] args)
        {
            if (args.Length != 3 && args.Length != 4)
            {
                Console.Error.WriteLine(
                    "Usage: StructuredNetworkWordCountWindowed " +
                    "<hostname> <port> <window duration in seconds> " +
                    "[<slide duration in seconds>]");
                Environment.Exit(1);
            }

            string hostname   = args[0];
            var    port       = int.Parse(args[1]);
            var    windowSize = int.Parse(args[2]);
            var    slideSize  = (args.Length == 3) ? windowSize : int.Parse(args[3]);

            if (slideSize > windowSize)
            {
                Console.Error.WriteLine(
                    "<slide duration> must be less than or equal " +
                    "to <window duration>");
            }
            var windowDuration = $"{windowSize} seconds";
            var slideDuration  = $"{slideSize} seconds";

            SparkSession spark = SparkSession
                                 .Builder()
                                 .AppName("StructuredNetworkWordCountWindowed")
                                 .GetOrCreate();

            DataFrame lines = spark
                              .ReadStream()
                              .Format("socket")
                              .Option("host", hostname)
                              .Option("port", port)
                              .Option("includeTimestamp", true)
                              .Load();

            DataFrame words = lines
                              .Select(Explode(Split(lines["value"], " "))
                                      .Alias("word"), lines["timestamp"]);
            DataFrame windowedCounts = words
                                       .GroupBy(Window(words["timestamp"], windowDuration, slideDuration),
                                                words["word"])
                                       .Count()
                                       .OrderBy("window");

            Spark.Sql.Streaming.StreamingQuery query = windowedCounts
                                                       .WriteStream()
                                                       .OutputMode("complete")
                                                       .Format("console")
                                                       .Option("truncate", false)
                                                       .Start();

            query.AwaitTermination();
        }
        public void Run(string[] args)
        {
            if (args.Length != 3)
            {
                Console.Error.WriteLine(
                    "Usage: StructuredKafkaWordCount " +
                    "<bootstrap-servers> <subscribe-type> <topics>");
                Environment.Exit(1);
            }

            string bootstrapServers = args[0];
            string subscribeType    = args[1];
            string topics           = args[2];

            SparkSession spark = SparkSession
                                 .Builder()
                                 .AppName("StructuredKafkaWordCount")
                                 .GetOrCreate();

            DataFrame lines = spark
                              .ReadStream()
                              .Format("kafka")
                              .Option("kafka.bootstrap.servers", bootstrapServers)
                              .Option(subscribeType, topics)
                              .Load()
                              .SelectExpr("CAST(value AS STRING)");

            DataFrame words = lines
                              .Select(Explode(Split(lines["value"], " "))
                                      .Alias("word"));
            DataFrame wordCounts = words.GroupBy("word").Count();

            Spark.Sql.Streaming.StreamingQuery query = wordCounts
                                                       .WriteStream()
                                                       .OutputMode("complete")
                                                       .Format("console")
                                                       .Start();

            query.AwaitTermination();
        }
        public void Run(string[] args)
        {
            if (args.Length != 2)
            {
                Console.Error.WriteLine(
                    "Usage: StructuredNetworkWordCount <hostname> <port>");
                Environment.Exit(1);
            }

            string hostname = args[0];
            var    port     = int.Parse(args[1]);

            SparkSession spark = SparkSession
                                 .Builder()
                                 .AppName("StructuredNetworkWordCount")
                                 .GetOrCreate();

            DataFrame lines = spark
                              .ReadStream()
                              .Format("socket")
                              .Option("host", hostname)
                              .Option("port", port)
                              .Load();

            DataFrame words = lines
                              .Select(Explode(Split(lines["value"], " "))
                                      .Alias("word"));
            DataFrame wordCounts = words.GroupBy("word").Count();

            Spark.Sql.Streaming.StreamingQuery query = wordCounts
                                                       .WriteStream()
                                                       .OutputMode("complete")
                                                       .Format("console")
                                                       .Start();

            query.AwaitTermination();
        }