Exemplo n.º 1
0
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("updating database" +
                                 ", partialCount: " + partialCount +
                                 ", totalCount: " + totalCount);
             db.insertValue(CurrentTimeMillis(), partialCount);
             partialCount = 0L;
             if (enableAck)
             {
                 Context.Logger.Info("tuplesToAck: " + tuplesToAck);
                 foreach (var tupleToAck in tuplesToAck)
                 {
                     this.ctx.Ack(tupleToAck);
                 }
                 tuplesToAck.Clear();
             }
         }
     }
     else
     {
         //Merge partialCount from all PartialCountBolt tasks
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount   += incomingPartialCount;
         //Do no ack here but add to the acking queue
         if (enableAck)
         {
             tuplesToAck.Enqueue(tuple);
         }
     }
 }
Exemplo n.º 2
0
        };                                                                                      //ascii 58--64 + misc.

        //Process a tuple from the stream
        public void Execute(SCPTuple tuple)
        {
            Context.Logger.Info("Execute enter");

            try
            {
                var words          = tuple.GetString(0).ToLower().Split(_punctuationChars);
                int sentimentScore = CalcSentimentScore(words);
                var word_pairs     = words.Take(words.Length - 1)
                                     .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1]));
                var all_words = words.Concat(word_pairs).ToList();

                // Emit all index entries for counting and writing downstream
                foreach (var word in all_words)
                {
                    this.ctx.Emit(new Values(word,
                                             tuple.GetLong(1),
                                             tuple.GetString(2),
                                             tuple.GetString(3),
                                             tuple.GetString(4),
                                             sentimentScore));
                }
            }
            catch (Exception ex)
            {
                Context.Logger.Error("SentimentIndexerBolt Exception: " + ex.Message + "\nStackTrace: \n" + ex.StackTrace);
            }

            Context.Logger.Info("Execute exit");
        }
Exemplo n.º 3
0
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("emitting totalCount" +
                                 ", partialCount: " + partialCount +
                                 ", totalCount: " + totalCount);
             //emit with anchors set the tuples in this batch
             this.ctx.Emit(Constants.DEFAULT_STREAM_ID, tuplesToAck, new Values(CurrentTimeMillis(), totalCount));
             Context.Logger.Info("acking the batch: " + tuplesToAck.Count);
             foreach (var t in tuplesToAck)
             {
                 this.ctx.Ack(t);
             }
             //once all the tuples are acked, clear the batch
             tuplesToAck.Clear();
             partialCount = 0L;
         }
     }
     else
     {
         //Merge partialCount from all PartialCountBolt tasks
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount   += incomingPartialCount;
         //Do no ack here but add to the acking queue
         tuplesToAck.Enqueue(tuple);
     }
 }
        public void Execute(SCPTuple tuple)
        {
            Context.Logger.Info("Execute enter");

            if (Constants.SYSTEM_TICK_STREAM_ID.Equals(tuple.GetSourceStreamId()))
            {
                long data = tuple.GetLong(0);
                Context.Logger.Info("tick tuple, value: {0}", data);
            }
            else
            {
                byte[] data     = tuple.GetBinary(0);
                int    bytesNum = data.Count();

                if (enableAck)
                {
                    this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple> {
                        tuple
                    }, new Values(bytesNum));
                    this.ctx.Ack(tuple);
                    Context.Logger.Info("emit bytesNum: {0}", bytesNum);
                    Context.Logger.Info("Ack tuple: tupleId: {0}", tuple.GetTupleId());
                }
                else
                {
                    this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new Values(bytesNum));
                    Context.Logger.Info("emit bytesNum: {0}", bytesNum);
                }
            }

            Context.Logger.Info("Execute exit");
        }
            ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`', '{', '|', '}', '~' };   //ascii 58--64 + misc.

        //Process a tuple from the stream
        public void Execute(SCPTuple tuple)
        {
            Context.Logger.Info("Execute enter");

            try
            {
                var words = tuple.GetString(0).ToLower().Split(_punctuationChars);
                int sentimentScore = CalcSentimentScore(words);
                var word_pairs = words.Take(words.Length - 1)
                                      .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1]));
                var all_words = words.Concat(word_pairs).ToList();

                // Emit all index entries for counting and writing downstream
                foreach (var word in all_words)
                {
                    this.ctx.Emit(new Values(word,
                                             tuple.GetLong(1),
                                             tuple.GetString(2),
                                             tuple.GetString(3),
                                             tuple.GetString(4),
                                             sentimentScore));
                }
            }
            catch (Exception ex)
            {
                Context.Logger.Error("SentimentIndexerBolt Exception: " + ex.Message + "\nStackTrace: \n" + ex.StackTrace);
            }

            Context.Logger.Info("Execute exit");
        }
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("emitting totalCount" +
                 ", partialCount: " + partialCount +
                 ", totalCount: " + totalCount);
             //emit with anchors set the tuples in this batch
             this.ctx.Emit(Constants.DEFAULT_STREAM_ID, tuplesToAck, new Values(CurrentTimeMillis(), totalCount));
             Context.Logger.Info("acking the batch: " + tuplesToAck.Count);
             foreach (var t in tuplesToAck)
             {
                 this.ctx.Ack(t);
             }
             //once all the tuples are acked, clear the batch
             tuplesToAck.Clear();
             partialCount = 0L;
         }
     }
     else
     {
         //Merge partialCount from all PartialCountBolt tasks
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount += incomingPartialCount;
         //Do no ack here but add to the acking queue
         tuplesToAck.Enqueue(tuple);
     }
 }
        public void Execute(SCPTuple tuple)
        {
            Context.Logger.Info("Execute enter");

            if (Constants.SYSTEM_TICK_STREAM_ID.Equals(tuple.GetSourceStreamId()))
            {
                long data = tuple.GetLong(0);
                Context.Logger.Info("tick tuple, value: {0}", data);
            }
            else
            {
                byte[] data = tuple.GetBinary(0);
                int bytesNum = data.Count();

                if (enableAck)
                {
                    this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple> { tuple }, new Values(bytesNum));
                    this.ctx.Ack(tuple);
                    Context.Logger.Info("emit bytesNum: {0}", bytesNum);
                    Context.Logger.Info("Ack tuple: tupleId: {0}", tuple.GetTupleId());
                }
                else
                {
                    this.ctx.Emit(Constants.DEFAULT_STREAM_ID, new Values(bytesNum));
                    Context.Logger.Info("emit bytesNum: {0}", bytesNum);
                }                
            }

            Context.Logger.Info("Execute exit");
        }
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("emitting totalCount" +
                 ", partialCount: " + partialCount +
                 ", totalCount: " + totalCount);
             this.ctx.Emit(new Values(CurrentTimeMillis(), totalCount));
             partialCount = 0L;
         }
     }
     else
     {
         //Merge partialCount from all EventCountPartialCountBolt
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount += incomingPartialCount;
     }
 }
Exemplo n.º 9
0
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("updating database" +
                                 ", partialCount: " + partialCount +
                                 ", totalCount: " + totalCount);
             db.insertValue(CurrentTimeMillis(), partialCount);
             partialCount = 0L;
         }
     }
     else
     {
         //Merge partialCount from all PartialCountBolt tasks
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount   += incomingPartialCount;
     }
 }
Exemplo n.º 10
0
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("emitting totalCount" +
                                 ", partialCount: " + partialCount +
                                 ", totalCount: " + totalCount);
             this.ctx.Emit(new Values(CurrentTimeMillis(), totalCount));
             partialCount = 0L;
         }
     }
     else
     {
         //Merge partialCount from all EventCountPartialCountBolt
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount   += incomingPartialCount;
     }
 }
        /// <summary>
        /// Executes incoming tuples
        /// </summary>
        /// <param name="tuple">The first field is treated as rowkey and rest as column names</param>
        public void Execute(SCPTuple tuple)
        {
            //get the tuple info
            string sessionId = tuple.GetString(0);
            string sessionEvent = tuple.GetString(1);
            long sessionEventTime = tuple.GetLong(2);

            //If it's a start event, assume there's nothing to find so just re-emit
            //NOTE: If messages may arrive out of order, you would need to add logic to
            //query HBase to see if the end event has previously arrived,
            //calculate the duration, etc.
            if (sessionEvent == "START")
            {
                //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration
                //since we don't know the END event yet.
                Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L);

                //Is ack enabled?
                if (enableAck)
                {
                    //Emit the values, anchored to the incoming tuple
                    this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues);
                    //Ack the incoming tuple
                    this.context.Ack(tuple);
                }
                else
                {
                    //No ack enabled? Fire and forget.
                    this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues);
                }
            }
            if (sessionEvent == "END")
            {
                //Use filters
                FilterList filters = new FilterList(FilterList.Operator.MustPassAll);
                //Filter on the row by sessionID
                RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId)));
                filters.AddFilter(rowFilter);
                //Filter on the event column for the START event
                SingleColumnValueFilter valueFilter = new SingleColumnValueFilter(
                    Encoding.UTF8.GetBytes("cf"),
                    Encoding.UTF8.GetBytes("event"),
                    CompareFilter.CompareOp.Equal,
                    Encoding.UTF8.GetBytes("START"));
                filters.AddFilter(valueFilter);
                //Create scanner settings using the filters
                var scannerSettings = new Scanner()
                {
                    filter = filters.ToEncodedString()
                };
                //Get the scanner
                var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings);

                CellSet readSet = null;
                while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null)
                {
                    //In theory we should only find one row
                    foreach (var row in readSet.rows)
                    {
                        //Pull back just the event column
                        var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event")
                            .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0];
                        //Is it a START event as expected?
                        if (rowState == "START")
                        {
                            //Get the start time
                            var startTime = TypeHelper.FromUnixTime(
                                row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time")
                                    .Select(v => BitConverter.ToInt64(v.data,0)).ToArray()[0]);
                            //Get the difference between start and end
                            DateTime endTime = TypeHelper.FromUnixTime(sessionEventTime);
                            TimeSpan duration = endTime.Subtract(startTime);
                            //Emit the tuple, with the duration between start/end.
                            Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks);
                            //If ack is enabled
                            if (enableAck)
                            {
                                //Emit the values, anchored to the incoming tuple
                                this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues);
                                //Ack the incoming tuple
                                this.context.Ack(tuple);
                            }
                            else
                            {
                                //No ack enabled? Fire and forget.
                                this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues);
                            }
                        }
                        else
                        {
                            //Since this is a simple example, do nothing.
                            //In a real solution, you'd have to figure out what to do
                            //when receiving an END before a START.
                        }
                    }
                }
            }
        }
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("updating database" +
                 ", partialCount: " + partialCount +
                 ", totalCount: " + totalCount);
             db.insertValue(CurrentTimeMillis(), partialCount);
             partialCount = 0L;
             if (enableAck)
             {
                 Context.Logger.Info("tuplesToAck: " + tuplesToAck);
                 foreach (var tupleToAck in tuplesToAck)
                 {
                     this.ctx.Ack(tupleToAck);
                 }
                 tuplesToAck.Clear();
             }
         }
     }
     else
     {
         //Merge partialCount from all PartialCountBolt tasks
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount += incomingPartialCount;
         //Do no ack here but add to the acking queue
         if (enableAck)
         {
             tuplesToAck.Enqueue(tuple);
         }
     }
 }
 /// <summary>
 /// The Execute() function will be called, when a new tuple is available.
 /// </summary>
 /// <param name="tuple"></param>
 public void Execute(SCPTuple tuple)
 {
     if (tuple.GetSourceStreamId().Equals(Constants.SYSTEM_TICK_STREAM_ID))
     {
         if (partialCount > 0)
         {
             Context.Logger.Info("updating database" +
                 ", partialCount: " + partialCount +
                 ", totalCount: " + totalCount);
             db.insertValue(CurrentTimeMillis(), partialCount);
             partialCount = 0L;
         }
     }
     else
     {
         //Merge partialCount from all PartialCountBolt tasks
         var incomingPartialCount = tuple.GetLong(0);
         partialCount += incomingPartialCount;
         totalCount += incomingPartialCount;
     }
 }
Exemplo n.º 14
0
        /// <summary>
        /// Executes incoming tuples
        /// </summary>
        /// <param name="tuple">The first field is treated as rowkey and rest as column names</param>
        public void Execute(SCPTuple tuple)
        {
            //get the tuple info
            string sessionId        = tuple.GetString(0);
            string sessionEvent     = tuple.GetString(1);
            long   sessionEventTime = tuple.GetLong(2);


            //If it's a start event, assume there's nothing to find so just re-emit
            //NOTE: If messages may arrive out of order, you would need to add logic to
            //query HBase to see if the end event has previously arrived,
            //calculate the duration, etc.
            if (sessionEvent == "START")
            {
                //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration
                //since we don't know the END event yet.
                Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L);

                //Is ack enabled?
                if (enableAck)
                {
                    //Emit the values, anchored to the incoming tuple
                    this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>()
                    {
                        tuple
                    }, emitValues);
                    //Ack the incoming tuple
                    this.context.Ack(tuple);
                }
                else
                {
                    //No ack enabled? Fire and forget.
                    this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues);
                }
            }
            if (sessionEvent == "END")
            {
                //Use filters
                FilterList filters = new FilterList(FilterList.Operator.MustPassAll);
                //Filter on the row by sessionID
                RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId)));
                filters.AddFilter(rowFilter);
                //Filter on the event column for the START event
                SingleColumnValueFilter valueFilter = new SingleColumnValueFilter(
                    Encoding.UTF8.GetBytes("cf"),
                    Encoding.UTF8.GetBytes("event"),
                    CompareFilter.CompareOp.Equal,
                    Encoding.UTF8.GetBytes("START"));
                filters.AddFilter(valueFilter);
                //Create scanner settings using the filters
                var scannerSettings = new Scanner()
                {
                    filter = filters.ToEncodedString()
                };
                //Get the scanner
                var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings);

                CellSet readSet = null;
                while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null)
                {
                    //In theory we should only find one row
                    foreach (var row in readSet.rows)
                    {
                        //Pull back just the event column
                        var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event")
                                       .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0];
                        //Is it a START event as expected?
                        if (rowState == "START")
                        {
                            //Get the start time
                            var startTime = TypeHelper.FromUnixTime(
                                row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time")
                                .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]);
                            //Get the difference between start and end
                            DateTime endTime  = TypeHelper.FromUnixTime(sessionEventTime);
                            TimeSpan duration = endTime.Subtract(startTime);
                            //Emit the tuple, with the duration between start/end.
                            Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks);
                            //If ack is enabled
                            if (enableAck)
                            {
                                //Emit the values, anchored to the incoming tuple
                                this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>()
                                {
                                    tuple
                                }, emitValues);
                                //Ack the incoming tuple
                                this.context.Ack(tuple);
                            }
                            else
                            {
                                //No ack enabled? Fire and forget.
                                this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues);
                            }
                        }
                        else
                        {
                            //Since this is a simple example, do nothing.
                            //In a real solution, you'd have to figure out what to do
                            //when receiving an END before a START.
                        }
                    }
                }
            }
        }