static void Main(string[] args) { var sparkContext = new SparkContext(new SparkConf().SetAppName("SparkCLREventHub Example")); var eventhubsParams = new Dictionary <string, string>() { { "eventhubs.policyname", "<policyname>" }, { "eventhubs.policykey", "<policykey>" }, { "eventhubs.namespace", "<namespace>" }, { "eventhubs.name", "<name>" }, { "eventhubs.partition.count", "<partitioncount>" }, { "eventhubs.consumergroup", "$default" }, { "eventhubs.checkpoint.dir", "<hdfs path to eventhub checkpoint dir>" }, { "eventhubs.checkpoint.interval", "<interval>" }, }; const int windowDurationInSecs = 5; const int slideDurationInSecs = 5; const string checkpointPath = "<hdfs path to spark checkpoint dir>"; //const string outputPath = "<hdfs path to output dir>"; const long slideDurationInMillis = 5000; StreamingContext sparkStreamingContext = StreamingContext.GetOrCreate(checkpointPath, () => { var ssc = new StreamingContext(sparkContext, slideDurationInMillis); ssc.Checkpoint(checkpointPath); var stream = EventHubsUtils.CreateUnionStream(ssc, eventhubsParams.Select(v => new Tuple <string, string>(v.Key, v.Value))); var countByLogLevelAndTime = stream .Map(bytes => Encoding.UTF8.GetString(bytes)) .Filter(line => line.Contains(",")) .Map(line => line.Split(',')) .Map(columns => new Tuple <string, int>(string.Format("{0},{1}", columns[0], columns[1]), 1)) .ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y, windowDurationInSecs, slideDurationInSecs, 3) .Map(logLevelCountPair => string.Format("{0},{1}", logLevelCountPair.Item1, logLevelCountPair.Item2)); countByLogLevelAndTime.ForeachRDD(countByLogLevel => { //dimensionalCount.SaveAsTextFile(string.Format("{0}/{1}", outputPath, Guid.NewGuid())); var dimensionalCountCollection = countByLogLevel.Collect(); foreach (var dimensionalCountItem in dimensionalCountCollection) { Console.WriteLine(dimensionalCountItem); } }); return(ssc); }); sparkStreamingContext.Start(); sparkStreamingContext.AwaitTermination(); }
public void TestCreateUnionStream() { var streamingContextProxy = new Mock <IStreamingContextProxy>(); var mockDstreamProxy = new Mock <IDStreamProxy>().Object; streamingContextProxy.Setup( m => m.EventHubsUnionStream(It.IsAny <Dictionary <string, string> >(), It.IsAny <StorageLevelType>())) .Returns(mockDstreamProxy); var mockSparkClrProxy = new Mock <ISparkCLRProxy>(); mockSparkClrProxy.Setup(m => m.CreateStreamingContext(It.IsAny <SparkContext>(), It.IsAny <long>())) .Returns(streamingContextProxy.Object); SparkCLREnvironment.SparkCLRProxy = mockSparkClrProxy.Object; var sparkContext = new SparkContext(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy, new SparkConf(new Mock <ISparkConfProxy>().Object)); var streamingContext = new StreamingContext(sparkContext, 123); var dstream = EventHubsUtils.CreateUnionStream(streamingContext, new Dictionary <string, string>()); Assert.AreEqual(mockDstreamProxy, dstream.DStreamProxy); }