public void Run(string[] args) { if (args.Length != 3 && args.Length != 4) { Console.Error.WriteLine( "Usage: StructuredNetworkWordCountWindowed " + "<hostname> <port> <window duration in seconds> " + "[<slide duration in seconds>]"); Environment.Exit(1); } string hostname = args[0]; var port = int.Parse(args[1]); var windowSize = int.Parse(args[2]); var slideSize = (args.Length == 3) ? windowSize : int.Parse(args[3]); if (slideSize > windowSize) { Console.Error.WriteLine( "<slide duration> must be less than or equal " + "to <window duration>"); } var windowDuration = $"{windowSize} seconds"; var slideDuration = $"{slideSize} seconds"; SparkSession spark = SparkSession .Builder() .AppName("StructuredNetworkWordCountWindowed") .GetOrCreate(); DataFrame lines = spark .ReadStream() .Format("socket") .Option("host", hostname) .Option("port", port) .Option("includeTimestamp", true) .Load(); DataFrame words = lines .Select(Explode(Split(lines["value"], " ")) .Alias("word"), lines["timestamp"]); DataFrame windowedCounts = words .GroupBy(Window(words["timestamp"], windowDuration, slideDuration), words["word"]) .Count() .OrderBy("window"); Spark.Sql.Streaming.StreamingQuery query = windowedCounts .WriteStream() .OutputMode("complete") .Format("console") .Option("truncate", false) .Start(); query.AwaitTermination(); }
public void Run(string[] args) { if (args.Length != 3) { Console.Error.WriteLine( "Usage: StructuredKafkaWordCount " + "<bootstrap-servers> <subscribe-type> <topics>"); Environment.Exit(1); } string bootstrapServers = args[0]; string subscribeType = args[1]; string topics = args[2]; SparkSession spark = SparkSession .Builder() .AppName("StructuredKafkaWordCount") .GetOrCreate(); DataFrame lines = spark .ReadStream() .Format("kafka") .Option("kafka.bootstrap.servers", bootstrapServers) .Option(subscribeType, topics) .Load() .SelectExpr("CAST(value AS STRING)"); DataFrame words = lines .Select(Explode(Split(lines["value"], " ")) .Alias("word")); DataFrame wordCounts = words.GroupBy("word").Count(); Spark.Sql.Streaming.StreamingQuery query = wordCounts .WriteStream() .OutputMode("complete") .Format("console") .Start(); query.AwaitTermination(); }
public void Run(string[] args) { if (args.Length != 2) { Console.Error.WriteLine( "Usage: StructuredNetworkWordCount <hostname> <port>"); Environment.Exit(1); } string hostname = args[0]; var port = int.Parse(args[1]); SparkSession spark = SparkSession .Builder() .AppName("StructuredNetworkWordCount") .GetOrCreate(); DataFrame lines = spark .ReadStream() .Format("socket") .Option("host", hostname) .Option("port", port) .Load(); DataFrame words = lines .Select(Explode(Split(lines["value"], " ")) .Alias("word")); DataFrame wordCounts = words.GroupBy("word").Count(); Spark.Sql.Streaming.StreamingQuery query = wordCounts .WriteStream() .OutputMode("complete") .Format("console") .Start(); query.AwaitTermination(); }