/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID)) { if (partialCount > 0) { Context.Logger.Info("updating database" + ", partialCount: " + partialCount + ", totalCount: " + totalCount); db.insertValue(CurrentTimeMillis(), partialCount); partialCount = 0L; if (enableAck) { Context.Logger.Info("tuplesToAck: " + tuplesToAck); foreach (var tupleToAck in tuplesToAck) { this.ctx.Ack(tupleToAck); } tuplesToAck.Clear(); } } } else { //Merge partialCount from all PartialCountBolt tasks var incomingPartialCount = tuple.GetLong(0); partialCount += incomingPartialCount; totalCount += incomingPartialCount; //Do no ack here but add to the acking queue if (enableAck) { tuplesToAck.Enqueue(tuple); } } }
}; //ascii 58--64 + misc. //Process a tuple from the stream public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); try { var words = tuple.GetString(0).ToLower().Split(_punctuationChars); int sentimentScore = CalcSentimentScore(words); var word_pairs = words.Take(words.Length - 1) .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1])); var all_words = words.Concat(word_pairs).ToList(); // Emit all index entries for counting and writing downstream foreach (var word in all_words) { this.ctx.Emit(new Values(word, tuple.GetLong(1), tuple.GetString(2), tuple.GetString(3), tuple.GetString(4), sentimentScore)); } } catch (Exception ex) { Context.Logger.Error("SentimentIndexerBolt Exception: " + ex.Message + "\nStackTrace: \n" + ex.StackTrace); } Context.Logger.Info("Execute exit"); }
/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID)) { if (partialCount > 0) { Context.Logger.Info("emitting totalCount" + ", partialCount: " + partialCount + ", totalCount: " + totalCount); //emit with anchors set the tuples in this batch this.ctx.Emit(Constants.DEFAULT_STREAM_ID, tuplesToAck, new Values(CurrentTimeMillis(), totalCount)); Context.Logger.Info("acking the batch: " + tuplesToAck.Count); foreach (var t in tuplesToAck) { this.ctx.Ack(t); } //once all the tuples are acked, clear the batch tuplesToAck.Clear(); partialCount = 0L; } } else { //Merge partialCount from all PartialCountBolt tasks var incomingPartialCount = tuple.GetLong(0); partialCount += incomingPartialCount; totalCount += incomingPartialCount; //Do no ack here but add to the acking queue tuplesToAck.Enqueue(tuple); } }
public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); if (Constants.SYSTEM_TICK_STREAM_ID.Equals(tuple.GetSourceStreamId())) { long data = tuple.GetLong(0); Context.Logger.Info("tick tuple, value: {0}", data); } else { byte[] data = tuple.GetBinary(0); int bytesNum = data.Count(); if (enableAck) { this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple> { tuple }, new Values(bytesNum)); this.ctx.Ack(tuple); Context.Logger.Info("emit bytesNum: {0}", bytesNum); Context.Logger.Info("Ack tuple: tupleId: {0}", tuple.GetTupleId()); } else { this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new Values(bytesNum)); Context.Logger.Info("emit bytesNum: {0}", bytesNum); } } Context.Logger.Info("Execute exit"); }
':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`', '{', '|', '}', '~' }; //ascii 58--64 + misc. //Process a tuple from the stream public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); try { var words = tuple.GetString(0).ToLower().Split(_punctuationChars); int sentimentScore = CalcSentimentScore(words); var word_pairs = words.Take(words.Length - 1) .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1])); var all_words = words.Concat(word_pairs).ToList(); // Emit all index entries for counting and writing downstream foreach (var word in all_words) { this.ctx.Emit(new Values(word, tuple.GetLong(1), tuple.GetString(2), tuple.GetString(3), tuple.GetString(4), sentimentScore)); } } catch (Exception ex) { Context.Logger.Error("SentimentIndexerBolt Exception: " + ex.Message + "\nStackTrace: \n" + ex.StackTrace); } Context.Logger.Info("Execute exit"); }
public void Execute(SCPTuple tuple) { Context.Logger.Info("Execute enter"); if (Constants.SYSTEM_TICK_STREAM_ID.Equals(tuple.GetSourceStreamId())) { long data = tuple.GetLong(0); Context.Logger.Info("tick tuple, value: {0}", data); } else { byte[] data = tuple.GetBinary(0); int bytesNum = data.Count(); if (enableAck) { this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple> { tuple }, new Values(bytesNum)); this.ctx.Ack(tuple); Context.Logger.Info("emit bytesNum: {0}", bytesNum); Context.Logger.Info("Ack tuple: tupleId: {0}", tuple.GetTupleId()); } else { this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new Values(bytesNum)); Context.Logger.Info("emit bytesNum: {0}", bytesNum); } } Context.Logger.Info("Execute exit"); }
/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID)) { if (partialCount > 0) { Context.Logger.Info("emitting totalCount" + ", partialCount: " + partialCount + ", totalCount: " + totalCount); this.ctx.Emit(new Values(CurrentTimeMillis(), totalCount)); partialCount = 0L; } } else { //Merge partialCount from all EventCountPartialCountBolt var incomingPartialCount = tuple.GetLong(0); partialCount += incomingPartialCount; totalCount += incomingPartialCount; } }
/// <summary> /// The Execute() function will be called, when a new tuple is available. /// </summary> /// <param name="tuple"></param> public void Execute(SCPTuple tuple) { if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID)) { if (partialCount > 0) { Context.Logger.Info("updating database" + ", partialCount: " + partialCount + ", totalCount: " + totalCount); db.insertValue(CurrentTimeMillis(), partialCount); partialCount = 0L; } } else { //Merge partialCount from all PartialCountBolt tasks var incomingPartialCount = tuple.GetLong(0); partialCount += incomingPartialCount; totalCount += incomingPartialCount; } }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { //get the tuple info string sessionId = tuple.GetString(0); string sessionEvent = tuple.GetString(1); long sessionEventTime = tuple.GetLong(2); //If it's a start event, assume there's nothing to find so just re-emit //NOTE: If messages may arrive out of order, you would need to add logic to //query HBase to see if the end event has previously arrived, //calculate the duration, etc. if (sessionEvent == "START") { //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration //since we don't know the END event yet. Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L); //Is ack enabled? if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } if (sessionEvent == "END") { //Use filters FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter on the row by sessionID RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId))); filters.AddFilter(rowFilter); //Filter on the event column for the START event SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes("START")); filters.AddFilter(valueFilter); //Create scanner settings using the filters var scannerSettings = new Scanner() { filter = filters.ToEncodedString() }; //Get the scanner var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null) { //In theory we should only find one row foreach (var row in readSet.rows) { //Pull back just the event column var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event") .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0]; //Is it a START event as expected? if (rowState == "START") { //Get the start time var startTime = TypeHelper.FromUnixTime( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data,0)).ToArray()[0]); //Get the difference between start and end DateTime endTime = TypeHelper.FromUnixTime(sessionEventTime); TimeSpan duration = endTime.Subtract(startTime); //Emit the tuple, with the duration between start/end. Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks); //If ack is enabled if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } else { //Since this is a simple example, do nothing. //In a real solution, you'd have to figure out what to do //when receiving an END before a START. } } } } }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { //get the tuple info string sessionId = tuple.GetString(0); string sessionEvent = tuple.GetString(1); long sessionEventTime = tuple.GetLong(2); //If it's a start event, assume there's nothing to find so just re-emit //NOTE: If messages may arrive out of order, you would need to add logic to //query HBase to see if the end event has previously arrived, //calculate the duration, etc. if (sessionEvent == "START") { //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration //since we don't know the END event yet. Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L); //Is ack enabled? if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } if (sessionEvent == "END") { //Use filters FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter on the row by sessionID RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId))); filters.AddFilter(rowFilter); //Filter on the event column for the START event SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes("START")); filters.AddFilter(valueFilter); //Create scanner settings using the filters var scannerSettings = new Scanner() { filter = filters.ToEncodedString() }; //Get the scanner var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null) { //In theory we should only find one row foreach (var row in readSet.rows) { //Pull back just the event column var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event") .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0]; //Is it a START event as expected? if (rowState == "START") { //Get the start time var startTime = TypeHelper.FromUnixTime( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]); //Get the difference between start and end DateTime endTime = TypeHelper.FromUnixTime(sessionEventTime); TimeSpan duration = endTime.Subtract(startTime); //Emit the tuple, with the duration between start/end. Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks); //If ack is enabled if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } else { //Since this is a simple example, do nothing. //In a real solution, you'd have to figure out what to do //when receiving an END before a START. } } } } }