public virtual void ProcessTuple(SCPTuple tuple) { last_receive_count++; global_receive_count++; try { var aggregationTimestamp = (DateTime)tuple.GetValue(0); var primarykeyvalue = ((string)tuple.GetValue(1)) ?? Utilities.DEFAULT_VALUE; var secondarykeyvalue = ((string)tuple.GetValue(2)) ?? Utilities.DEFAULT_VALUE; var value = (double)tuple.GetValue(3); if (aggregationTimestamp != null) { Aggregate(aggregationTimestamp, primarykeyvalue, secondarykeyvalue, value); } else { Context.Logger.Warn("Cannot Aggregate: aggregationTimestamp is null. PrimaryKeyValue = {0}, SecondaryKeyValue = {1}, AggregationValue = {2}", primarykeyvalue, secondarykeyvalue, value); } } catch (Exception ex) { global_error_count++; last_error_count++; Context.Logger.Error(ex.ToString()); } }
/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); string streamId = tuple.GetSourceStreamId(); switch (streamId) { case SentenceGenerator.STREAM_ID: { string sentence = tuple.GetString(0); Context.Logger.Info("sentence: {0}", sentence); } break; case PersonGenerator.STREAM_ID: { Person person = (Person)tuple.GetValue(0); Context.Logger.Info("person: {0}", person.ToString()); } break; default: Context.Logger.Info("Get unknown tuple from unknown stream."); break; } Context.Logger.Info("Execute exit"); }
public void Execute(SCPTuple tuple) { try { // TwitterFeed tweet = tuple.GetValue(0) as TwitterFeed; TwitterFeed tweet = tuple.GetValue(0) as TwitterFeed; if (tweet != null) { Context.Logger.Info("SQL AZURE: Id:" + tweet.Id.ToString()); Context.Logger.Info("SQL AZURE: Text:" + tweet.Text.ToString()); Context.Logger.Info("SQL AZURE: RetweetCount:" + tweet.RetweetCount.ToString()); Context.Logger.Info("SQL AZURE: FavoriteCount:" + tweet.FavoriteCount.ToString()); Context.Logger.Info("SQL AZURE: Score:" + tweet.Score.ToString()); Context.Logger.Info("SQL AZURE: Created Date:" + tweet.Createddate.ToString()); Context.Logger.Info("SQL AZURE: DateTime.UtcNow:" + DateTime.UtcNow.ToString()); } List <object> rowValue = new List <object>(); rowValue.Add(tweet.Id); rowValue.Add(tweet.Text); rowValue.Add(tweet.RetweetCount); rowValue.Add(tweet.FavoriteCount); rowValue.Add(tweet.Score); rowValue.Add(GetSentimentType(tweet.Text)); rowValue.Add(tweet.Createddate); rowValue.Add(DateTime.UtcNow); //Upsert(new List<int> { 1 }, rowValue); Insert(rowValue); } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); } }
/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); Person person = (Person)tuple.GetValue(0); Context.Logger.Info("person: {0}", person.ToString()); Context.Logger.Info("Execute exit"); }
public void Execute(SCPTuple tuple) { var isTickTuple = tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID); if (isTickTuple) { // Get top 10 higest score tweets from last time window Context.Logger.Debug($"Total tweets in window: {tweetCache.Count}"); var topNTweets = tweetCache.OrderByDescending(o => o.Score).Take(Math.Min(10, tweetCache.Count)).ToList(); // Emit it to TopNTweet Stream foreach (var tweet in topNTweets) { //this.context.Emit(StormConstants.TOPNTWEETS_STREAM, new Values(tweet.Text, tweet.Id, tweet.RetweetCount, tweet.FavoriteCount, tweet.UserFollowerCount, tweet.Score)); this.context.Emit("TOPNTWEETS_STREAM", new Values(tweet)); } // Remove all existing data and wait for new one tweetCache.Clear(); } else { try { // Process tuple and then acknowledge it SerializableTweet tweet = tuple.GetValue(0) as SerializableTweet; if (!tweetCache.Any(o => o.Id.Equals(tweet.Id))) { tweetCache.Add(tweet); } Context.Logger.Info(tweet.ToString()); if (enableAck) { this.context.Ack(tuple); Context.Logger.Info("Total Ack: " + ++totalAck); } } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); //Fail the tuple if enableAck is set to true in TopologyBuilder so that the tuple is replayed. if (enableAck) { this.context.Fail(tuple); } } } }
/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { //Get the string data from the tuple string eventValue = (string)tuple.GetValue(0); if (eventValue != null) { //Log the data Context.Logger.Info("Received data: " + eventValue); //ACK the tuple so the spout knows it was processed //If we don't ACK, the EventHubSpout can stop receiving; it expects ACKs this.ctx.Ack(tuple); } }
public virtual void Execute(SCPTuple tuple) { last_receive_count++; global_receive_count++; var inputeventdata = (string)tuple.GetValue(0); try { if (inputeventdata != null) { JToken token = JObject.Parse(inputeventdata); //This assumes that you wish to respect the timestamp field in your tuple //If you dont care on the order or timestamp of tuple, you can send a DateTime.UtcNow i.e. the receive time //This will allow you to aggregate based on current time than original event time. var timestampvalue = (string)token.SelectToken(this.appConfig.TimestampField); var timestamp = new DateTime(); var result = DateTime.TryParse(timestampvalue, out timestamp); //This computes an additional timestamp which is floored to your aggregation window //This acts as an alternative strategy to TickTuples as this allows you to process multiple windows at same time //and events arriving slightly out of order. For events that are huge apart i.e. //do not even fit in multiple AggregationWindows can potentially overwrite your previous aggregations //if you dont handle it properly later in your topology by doing right merges. //Based on your topology, you can choose which strategy suits you better var aggregationTimestamp = timestamp.Floor(this.appConfig.AggregationWindow); var primarykeyvalue = ((string)token.SelectToken(this.PrimaryKey)) ?? Utilities.DEFAULT_VALUE; var secondarykeyvalue = ((string)token.SelectToken(this.SecondaryKey)) ?? Utilities.DEFAULT_VALUE; //Aggregate the current input. The final argument can actually be a value of any field in your input, //allowing you to use this aggregation as sum than count. //We emit the aggregated tuples as part of aggregation process and expiry of the window. Aggregate(aggregationTimestamp, primarykeyvalue, secondarykeyvalue, 1); //Ack the tuple to the spout so that the spout can move forward and remove the tuple from its cache. //This is mandatory requirement if you use the default constructor for EventHubSpout as it uses the ack based PartitionManager this.context.Ack(tuple); } } catch (Exception ex) { global_error_count++; Context.Logger.Error(ex.ToString()); //Fail the tuple in spout if you were not able to deserialize or emit it. this.context.Fail(tuple); } }
//Process a tuple from the stream public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); var tweetCount = (long)tuple.GetValue(0); var tweet = tuple.GetValue(1) as string; try { //Only send updates every 500 milliseconds //Ignore the messages in between so that you don't overload the SignalR website with updates at each tuple //If you have only aggreagates to send that can be spaced, you don't need this timer if (timer.ElapsedMilliseconds >= 100) { SendSingnalRUpdate(tweetCount, tweet); timer.Restart(); } } catch (Exception ex) { Context.Logger.Error("SignalRBroadcastBolt Exception: " + ex.Message + "\nStackTrace: \n" + ex.StackTrace); } Context.Logger.Info("Execute exit"); }
public void Execute(SCPTuple tuple) { var isTickTuple = tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID); if (isTickTuple) { // Get top 10 higest forwards + retweets count from last time window of 5 seconds Context.Logger.Debug($"Total tweets in window: {tweetCache.Count}"); var topNTweets = tweetCache.OrderByDescending(o => o.Score).Take(Math.Min(10, tweetCache.Count)).ToList(); foreach (var tweet in topNTweets) { this.context.Emit("TWEETRANK_STREAM", new Values(tweet)); } tweetCache.Clear(); } else { try { TwitterFeed tweet = tuple.GetValue(0) as TwitterFeed; if (!tweetCache.Any(o => o.Id.Equals(tweet.Id))) { tweetCache.Add(tweet); } Context.Logger.Info(tweet.ToString()); if (enableAck) { this.context.Ack(tuple); Context.Logger.Info("Total Ack: " + ++totalAck); } } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); if (enableAck) { this.context.Fail(tuple); } } } }
public void Execute(SCPTuple tuple) { Context.Logger.Info("Processing events"); string eventValue = (string)tuple.GetValue(0); if (eventValue != null) { JObject eventData = JObject.Parse(eventValue); Device device = new Device((int)eventData["deviceId"]); device.value = (int)eventData["deviceValue"]; TableOperation insertOperation = TableOperation.Insert(device); table.Execute(insertOperation); this.ctx.Ack(tuple); } }
/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); Person person = (Person)tuple.GetValue(0); Context.Logger.Info("person: {0}", person.ToString()); // log some info to out file for bvt test validataion if (taskIndex == 0) // For component with multiple parallism, only one of them need to log info { string fileName = @"..\..\..\..\..\HybridTopologyOutput" + Process.GetCurrentProcess().Id + ".txt"; FileStream fs = new FileStream(fileName, FileMode.Append); using (StreamWriter writer = new StreamWriter(fs)) { writer.WriteLine("person: {0}", person.ToString()); } } Context.Logger.Info("Execute exit"); }
/// <summary> /// The execute method for incoming tuples /// </summary> /// <param name="tuple">The incoming tuple</param> public void Execute(SCPTuple tuple) { try { //Assuming that the first field of the incoming tuple has the lookup value you are interested in var value = tuple.GetValue(0); var lookupValue = value.ToString(); IEnumerable <object> documents = null; if (value is string) { documents = this.documentClient.CreateDocumentQuery(documentCollection.DocumentsLink). Where(d => d.Id.Equals(value)).AsEnumerable(); } else { Context.Logger.Info("Lookup value is not a string, getting the value for the lookup field in the object."); lookupValue = value.GetType().GetProperty(this.DocumentDbLookupField).GetValue(value).ToString(); string query = "SELECT * FROM ROOT R WHERE R[\"" + this.DocumentDbLookupField + "\"] = \"" + lookupValue + "\""; Context.Logger.Info("DocumentDb Query: {0}", query); documents = this.documentClient.CreateDocumentQuery(documentCollection.DocumentsLink, query).AsEnumerable(); } if (documents.Count() == 0) { Context.Logger.Info("No documents found for lookup field: {0}, lookup value: {1}", this.DocumentDbLookupField, lookupValue); } else { foreach (var document in documents) { //A document is just JSON so we will call a ToString() to set the emitValue as JSON string var emitValue = document.ToString(); Context.Logger.Info("Found document for lookup field: {0}, lookup value: {1}, document: {2}", this.DocumentDbLookupField, lookupValue, emitValue); if (enableAck) { //NOTE: For a Bolt with enableAck we need to emit with anchors - list of tuples //In this scenario we are emitting per tuple so the anchor is only for this tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, new Values(emitValue)); } else { this.context.Emit(Constants.DEFAULT_STREAM_ID, new Values(emitValue)); } } } //Ack the tuple if enableAck is set to true in TopologyBuilder. This is mandatory if the downstream bolt or spout expects an ack. if (enableAck) { this.context.Ack(tuple); } } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); //Fail the tuple if enableAck is set to true in TopologyBuilder so that the tuple is replayed. if (enableAck) { this.context.Fail(tuple); } } }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { try { //TODO: Change the HBase scanning criteria as per your needs //filter = new PrefixFilter(ToBytes(tuple.GetValue(0))) //Or, use a different field for end scan like: endRow = ToBytes(tuple.GetValue(1)) var scannersettings = new Scanner() { startRow = ToBytes(tuple.GetValue(0)), endRow = ToBytes(tuple.GetValue(0)), }; var scannerInfo = HBaseClusterClient.CreateScanner(this.HBaseTableName, scannersettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scannerInfo)) != null) { Context.Logger.Info("Rows found: {0}", readSet.rows.Count); foreach (var row in readSet.rows) { var emitValues = new List <object>(); //TODO: You can choose to emit the row key along with the values emitValues.Add(Encoding.UTF8.GetString(row.key)); //Add the values from the readSet //TODO: The byte[] from HBase can be any type, make sure you type cast it correctly before emitting //The code below only handles strings emitValues.AddRange(row.values.Select(v => Encoding.UTF8.GetString(v.data))); Context.Logger.Info("Rowkey: {0}, Values: {1}", Encoding.UTF8.GetString(row.key), String.Join(", ", row.values.Select(v => Encoding.UTF8.GetString(v.data)))); if (enableAck) { this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); } else { this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } } //Ack the tuple if enableAck is set to true in TopologyBuilder. This is mandatory if the downstream bolt or spout expects an ack. if (enableAck) { this.context.Ack(tuple); } } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); //Fail the tuple if enableAck is set to true in TopologyBuilder so that the tuple is replayed. if (enableAck) { this.context.Fail(tuple); } } }
/// <summary> /// The execute method for incoming tuples /// </summary> /// <param name="tuple">The incoming tuple</param> public void Execute(SCPTuple tuple) { try { //Assuming that the first field of the incoming tuple has the lookup value you are interested in var value = tuple.GetValue(0); var lookupValue = value.ToString(); IEnumerable<object> documents = null; if (value is string) { documents = this.documentClient.CreateDocumentQuery(documentCollection.DocumentsLink). Where(d => d.Id.Equals(value)).AsEnumerable(); } else { Context.Logger.Info("Lookup value is not a string, getting the value for the lookup field in the object."); lookupValue = value.GetType().GetProperty(this.DocumentDbLookupField).GetValue(value).ToString(); string query = "SELECT * FROM ROOT R WHERE R[\"" + this.DocumentDbLookupField + "\"] = \"" + lookupValue + "\""; Context.Logger.Info("DocumentDb Query: {0}", query); documents = this.documentClient.CreateDocumentQuery(documentCollection.DocumentsLink, query).AsEnumerable(); } if (documents.Count() == 0) { Context.Logger.Info("No documents found for lookup field: {0}, lookup value: {1}", this.DocumentDbLookupField, lookupValue); } else { foreach (var document in documents) { //A document is just JSON so we will call a ToString() to set the emitValue as JSON string var emitValue = document.ToString(); Context.Logger.Info("Found document for lookup field: {0}, lookup value: {1}, document: {2}", this.DocumentDbLookupField, lookupValue, emitValue); if (enableAck) { //NOTE: For a Bolt with enableAck we need to emit with anchors - list of tuples //In this scenario we are emitting per tuple so the anchor is only for this tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, new Values(emitValue)); } else { this.context.Emit(Constants.DEFAULT_STREAM_ID, new Values(emitValue)); } } } //Ack the tuple if enableAck is set to true in TopologyBuilder. This is mandatory if the downstream bolt or spout expects an ack. if (enableAck) { this.context.Ack(tuple); } } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); //Fail the tuple if enableAck is set to true in TopologyBuilder so that the tuple is replayed. if (enableAck) { this.context.Fail(tuple); } } }
public void Execute(SCPTuple tuple) { var tweet = tuple.GetValue(0) as SerializableTweet; ExecuteTweet(tweet); }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { //get the tuple info string sessionId = tuple.GetString(0); string sessionEvent = tuple.GetString(1); long sessionEventTime = tuple.GetLong(2); //If it's a start event, assume there's nothing to find so just re-emit //NOTE: If messages may arrive out of order, you would need to add logic to //query HBase to see if the end event has previously arrived, //calculate the duration, etc. if (sessionEvent == "START") { //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration //since we don't know the END event yet. Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L); //Is ack enabled? if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } if (sessionEvent == "END") { //Use filters FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter on the row by sessionID RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId))); filters.AddFilter(rowFilter); //Filter on the event column for the START event SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes("START")); filters.AddFilter(valueFilter); //Create scanner settings using the filters var scannerSettings = new Scanner() { filter = filters.ToEncodedString() }; //Get the scanner var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null) { //In theory we should only find one row foreach (var row in readSet.rows) { //Pull back just the event column var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event") .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0]; //Is it a START event as expected? if (rowState == "START") { //Get the start time var startTime = TypeHelper.FromUnixTime( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data,0)).ToArray()[0]); //Get the difference between start and end DateTime endTime = TypeHelper.FromUnixTime(sessionEventTime); TimeSpan duration = endTime.Subtract(startTime); //Emit the tuple, with the duration between start/end. Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks); //If ack is enabled if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } else { //Since this is a simple example, do nothing. //In a real solution, you'd have to figure out what to do //when receiving an END before a START. } } } } }
public void Execute(SCPTuple tuple) { count += (int)tuple.GetValue(0); Context.Logger.Info("counter", count); }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { try { //TODO: Change the HBase scanning criteria as per your needs //filter = new PrefixFilter(ToBytes(tuple.GetValue(0))) //Or, use a different field for end scan like: endRow = ToBytes(tuple.GetValue(1)) var scannersettings = new Scanner() { startRow = ToBytes(tuple.GetValue(0)), endRow = ToBytes(tuple.GetValue(0)), }; var scannerInfo = HBaseClusterClient.CreateScanner(this.HBaseTableName, scannersettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scannerInfo)) != null) { Context.Logger.Info("Rows found: {0}", readSet.rows.Count); foreach (var row in readSet.rows) { var emitValues = new List<object>(); //TODO: You can choose to emit the row key along with the values emitValues.Add(Encoding.UTF8.GetString(row.key)); //Add the values from the readSet //TODO: The byte[] from HBase can be any type, make sure you type cast it correctly before emitting //The code below only handles strings emitValues.AddRange(row.values.Select(v => Encoding.UTF8.GetString(v.data))); Context.Logger.Info("Rowkey: {0}, Values: {1}", Encoding.UTF8.GetString(row.key), String.Join(", ", row.values.Select(v => Encoding.UTF8.GetString(v.data)))); if (enableAck) { this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues); } else { this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } } //Ack the tuple if enableAck is set to true in TopologyBuilder. This is mandatory if the downstream bolt or spout expects an ack. if (enableAck) { this.context.Ack(tuple); } } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); //Fail the tuple if enableAck is set to true in TopologyBuilder so that the tuple is replayed. if (enableAck) { this.context.Fail(tuple); } } }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { //get the tuple info string sessionId = tuple.GetString(0); string sessionEvent = tuple.GetString(1); long sessionEventTime = tuple.GetLong(2); //If it's a start event, assume there's nothing to find so just re-emit //NOTE: If messages may arrive out of order, you would need to add logic to //query HBase to see if the end event has previously arrived, //calculate the duration, etc. if (sessionEvent == "START") { //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration //since we don't know the END event yet. Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L); //Is ack enabled? if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } if (sessionEvent == "END") { //Use filters FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter on the row by sessionID RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId))); filters.AddFilter(rowFilter); //Filter on the event column for the START event SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes("START")); filters.AddFilter(valueFilter); //Create scanner settings using the filters var scannerSettings = new Scanner() { filter = filters.ToEncodedString() }; //Get the scanner var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null) { //In theory we should only find one row foreach (var row in readSet.rows) { //Pull back just the event column var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event") .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0]; //Is it a START event as expected? if (rowState == "START") { //Get the start time var startTime = TypeHelper.FromUnixTime( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]); //Get the difference between start and end DateTime endTime = TypeHelper.FromUnixTime(sessionEventTime); TimeSpan duration = endTime.Subtract(startTime); //Emit the tuple, with the duration between start/end. Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks); //If ack is enabled if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } else { //Since this is a simple example, do nothing. //In a real solution, you'd have to figure out what to do //when receiving an END before a START. } } } } }