/// <summary> /// Retrieve sessions within the given time range /// </summary> /// <param name="hbaseClient">The hbase client</param> /// <param name="eventType">The type of event to look for</param> /// <param name="start">Lower bound of the time range</param> /// <param name="end">Upper bound of the time range</param> static void GetSessionsByTime(HBaseClient hbaseClient, string eventType, DateTime start, DateTime end) { //Create filters list FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter to search for the event type value SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes(eventType)); filters.AddFilter(valueFilter); //Create scanner, set maxVersions so we can get previous versions of rows //Since START events may not be the currently returned value var scannerSettings = new Scanner() { filter = filters.ToEncodedString(), maxVersions = 5, startTime = ToUnixTime(start), endTime = ToUnixTime(end) }; var scanner = hbaseClient.CreateScanner(Properties.Settings.Default.HBaseTableName, scannerSettings); //Read data from scanner CellSet readSet = null; //While reading cell sets while ((readSet = hbaseClient.ScannerGetNext(scanner)) != null) { //Iterate over the rows returned foreach (var row in readSet.rows) { //Get the time stored for the START event var endTime = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]; //Get the hbase timestamp of the row var timestamp = row.values.Select(v => v.timestamp).ToArray()[0]; //If it's an end event type if (eventType == "END") { //Get the duration stored between END and START events var duration = new TimeSpan( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:duration") .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]); //Write out the session info, including duration Console.WriteLine("Session {0} lasted {1} minutes, and ended at {2}", Encoding.UTF8.GetString(row.key), duration.Minutes, FromUnixTime(endTime)); } else { //If start event type, just write out when it started and the hbase timestamp for the row Console.WriteLine("Session {0} started at {1}. Timestamp = {2}", Encoding.UTF8.GetString(row.key), FromUnixTime(endTime), timestamp); } } } }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { //get the tuple info string sessionId = tuple.GetString(0); string sessionEvent = tuple.GetString(1); long sessionEventTime = tuple.GetLong(2); //If it's a start event, assume there's nothing to find so just re-emit //NOTE: If messages may arrive out of order, you would need to add logic to //query HBase to see if the end event has previously arrived, //calculate the duration, etc. if (sessionEvent == "START") { //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration //since we don't know the END event yet. Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L); //Is ack enabled? if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } if (sessionEvent == "END") { //Use filters FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter on the row by sessionID RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId))); filters.AddFilter(rowFilter); //Filter on the event column for the START event SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes("START")); filters.AddFilter(valueFilter); //Create scanner settings using the filters var scannerSettings = new Scanner() { filter = filters.ToEncodedString() }; //Get the scanner var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null) { //In theory we should only find one row foreach (var row in readSet.rows) { //Pull back just the event column var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event") .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0]; //Is it a START event as expected? if (rowState == "START") { //Get the start time var startTime = TypeHelper.FromUnixTime( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data,0)).ToArray()[0]); //Get the difference between start and end DateTime endTime = TypeHelper.FromUnixTime(sessionEventTime); TimeSpan duration = endTime.Subtract(startTime); //Emit the tuple, with the duration between start/end. Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks); //If ack is enabled if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List<SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } else { //Since this is a simple example, do nothing. //In a real solution, you'd have to figure out what to do //when receiving an END before a START. } } } } }