public CellSet GetCells(ItemDto item, string culture)
        {
            var cellSetRow = new CellSet.Row { key = Encoding.UTF8.GetBytes(item.Code) };
            var cells = new List<Cell>();
            cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:Status"), data = Encoding.UTF8.GetBytes(item.Status) });
            cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:MH" + item.MerchandiseHierarchy.Code), data = _serializer.SerializeToBson(item.MerchandiseHierarchy) });
            cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:CreatedDate"), data = Encoding.UTF8.GetBytes(DateTime.Now.ToString()) });
            cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:UpdatedDate"), data = Encoding.UTF8.GetBytes(DateTime.Now.ToString()) });
            cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:Long_" + culture), data = Encoding.UTF8.GetBytes(item.Description) });
            cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:Short_" + culture), data = Encoding.UTF8.GetBytes(item.ShortDescription) });

            foreach (var group in item.Groups)
                cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:G" + group.Code), data = _serializer.SerializeToBson(group) });

            foreach (var identifier in item.Identifiers)
                cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:I" + identifier.Type), data = Encoding.UTF8.GetBytes(identifier.Value) });

            foreach (var attributes in item.Attributes)
                cells.Add(new Cell { column = Encoding.UTF8.GetBytes("CF1:A" + culture + "_" + attributes.Id), data = _serializer.SerializeToBson(attributes.Value) });
            cellSetRow.values.AddRange(cells);

            var cellSet = new CellSet();
            cellSet.rows.Add(cellSetRow);
            return cellSet;
        }
Esempio n. 2
0
 public void WriteTweet(FSTweet tweet)
 {
     CellSet.Row row = TableDomainMappers.TweetToRow(tweet);
     CellSet set = new CellSet();
     set.rows.Add(row);
     HadoopContext.HBaseClient.StoreCells(HadoopContext.TweetTableName, set);
 }
Esempio n. 3
0
 public void WriteWordRelation(FSWordRelationship relation)
 {
     CellSet.Row row = TableDomainMappers.WordRelationToRow(relation);
     CellSet set = new CellSet();
     set.rows.Add(row);
     HadoopContext.HBaseClient.StoreCells(HadoopContext.WordRelationTableName, set);
 }
Esempio n. 4
0
        public void SaveMyDto(Dto dto)
        {
            CellSet cellSet = new CellSet();
            CellSet.Row cellSetRow = new CellSet.Row { key = Encoding.UTF8.GetBytes(_sampleRowKey) };
            cellSet.rows.Add(cellSetRow);

            Cell value1 = new Cell { column = Encoding.UTF8.GetBytes("CF1:field1"), data = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(dto.Field1)) };
            Cell value2 = new Cell { column = Encoding.UTF8.GetBytes("CF1:field2"), data = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(dto.Field2)) };
            Cell value3 = new Cell { column = Encoding.UTF8.GetBytes("CF1:field3"), data = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(dto.Field3)) };
            Cell value4 = new Cell { column = Encoding.UTF8.GetBytes("CF1:field4"), data = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(dto.Field4)) };
            Cell value5 = new Cell { column = Encoding.UTF8.GetBytes("CF1:field5"), data = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(dto.Field5)) };
            //     Cell value6 = new Cell { column = Encoding.UTF8.GetBytes("CF1:NestedData"), data = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(dto.NestedData))};

            byte[] nestedDataBlob;

            using (var memoryStream = new MemoryStream())
            {
                using (var writer = new BsonWriter(memoryStream))
                {
                    _serializer.Serialize(writer, dto.NestedData);
                    nestedDataBlob = memoryStream.ToArray();
                }
            }

            Cell value6 = new Cell { column = Encoding.UTF8.GetBytes("CF1:NestedData"), data = nestedDataBlob };
            cellSetRow.values.AddRange(new List<Cell>() { value1, value2, value3, value4, value5, value6 });
            _client.StoreCells(_sampleTableName, cellSet);
        }
Esempio n. 5
0
        public void UpdateRowCount(long rowCount)
        {
            var set = new CellSet();
            CreateRowCountCell(set, rowCount);

            client.StoreCells(TABLE_BY_WORDS_NAME, set);
        }
Esempio n. 6
0
 // Popular a CellSet object to be written into HBase
 private void CreateTweetByWordsCells(CellSet set, TweetSentimentData tweet)
 {
     // Create a row with a key
     var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(tweet.Id) };
     // Add columns to the row
     row.values.Add(
         new Cell { column = Encoding.UTF8.GetBytes("d:Text"), 
             data = Encoding.UTF8.GetBytes(tweet.Text) });
     row.values.Add(
         new Cell { column = Encoding.UTF8.GetBytes("d:CreatedOn"),
             data = Encoding.UTF8.GetBytes(tweet.CreatedOn.ToString()) });
     row.values.Add(
         new Cell { column = Encoding.UTF8.GetBytes("d:ReplyToId"),
             data = Encoding.UTF8.GetBytes(tweet.ReplyToId) });
     row.values.Add(
         new Cell { column = Encoding.UTF8.GetBytes("d:Sentiment"),
             data = Encoding.UTF8.GetBytes(tweet.Sentiment.ToString()) });
     if (tweet.Coordinates != null)
     {
         row.values.Add(
             new Cell { column = Encoding.UTF8.GetBytes("d:Coordinates"),
                 data = Encoding.UTF8.GetBytes(tweet.Coordinates) });
     }
     set.rows.Add(row);
 }
Esempio n. 7
0
 public void WriteConversation(FSConversation convo)
 {
     CellSet.Row row = TableDomainMappers.ConversationToRow(convo);
     CellSet set = new CellSet();
     set.rows.Add(row);
     HadoopContext.HBaseClient.StoreCells(HadoopContext.ConversationTableName, set);
     Thread.Sleep(100);
 }
Esempio n. 8
0
 public void WriteTweets(IEnumerable<FSTweet> tweets)
 {
     CellSet set = new CellSet();
     foreach(FSTweet t in tweets)
     {
         set.rows.Add(TableDomainMappers.TweetToRow(t));
     }
     HadoopContext.HBaseClient.StoreCells(HadoopContext.TweetTableName, set);
 }
Esempio n. 9
0
 public void WriteWordRelations(IEnumerable<FSWordRelationship> relations)
 {
     CellSet set = new CellSet();
     foreach(FSWordRelationship relation in relations)
     {
         set.rows.Add(TableDomainMappers.WordRelationToRow(relation));
     }
     HadoopContext.HBaseClient.StoreCells(HadoopContext.WordRelationTableName, set);
 }
Esempio n. 10
0
        public void WriteIndexItems(List<TweetIndexItem> items)
        {
            var set = new CellSet();

            foreach(var item in items)
            {
                CreateTweetByWordsCells(set, item);
            }

            client.StoreCells(TABLE_BY_WORDS_NAME, set);
        }
Esempio n. 11
0
        private void CreateRowCountCell(CellSet set, long count)
        {
            var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(COUNT_ROW_KEY) };

            var value = new Cell
            {
                column = Encoding.UTF8.GetBytes(COUNT_COLUMN_NAME),
                data = Encoding.UTF8.GetBytes(count.ToString())
            };
            row.values.Add(value);
            set.rows.Add(row);
        }
        public void StoreTestData(IHBaseClient hbaseClient)
        {
            // we are going to insert the keys 0 to 100 and then do some range queries on that
            const string testValue = "the force is strong in this column";
            var set = new CellSet();
            for (int i = 0; i < 100; i++)
            {
                var row = new CellSet.Row { key = BitConverter.GetBytes(i) };
                var value = new Cell { column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue) };
                row.values.Add(value);
                set.rows.Add(row);
            }

            hbaseClient.StoreCellsAsync(testTableName, set).Wait();
        }
        public void TestCellsMultiVersionGet()
        {
            const string testKey = "content";
            const string testValue = "the force is strong in this column";
            var client = CreateClient();
            var set = new CellSet();
            var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(testKey) };
            set.rows.Add(row);

            var value = new Cell { column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue) };
            row.values.Add(value);

            client.StoreCellsAsync(testTableName, set).Wait();
            client.StoreCellsAsync(testTableName, set).Wait();
            CellSet cell = client.GetCellsAsync(testTableName, testKey, "d:starwars", "3").Result;
            Assert.AreEqual(2, cell.rows[0].values.Count);
        }
Esempio n. 14
0
        public IEnumerable<FSWordRelationship> ReadAllWordRelationships()
        {
            Scanner s = new Scanner()
            {
                batch = 10
            };
            ScannerInformation si = client.CreateScanner(HadoopContext.WordRelationTableName, s);
            CellSet next = null;
            CellSet readRows = new CellSet();
            while ((next = client.ScannerGetNext(si)) != null)
            {
                foreach (CellSet.Row row in next.rows)
                {
                    //convert row into desired domain type....
                    readRows.rows.Add(row);
                }
            }

            return null;
        }
        public void TestCellsDeletion()
        {
            const string testKey = "content";
            const string testValue = "the force is strong in this column";
            var client = CreateClient();
            var set = new CellSet();
            var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(testKey) };
            set.rows.Add(row);

            var value = new Cell { column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue) };
            row.values.Add(value);

            client.StoreCellsAsync(testTableName, set).Wait();
            CellSet cell = client.GetCellsAsync(testTableName, testKey).Result;
            // make sure the cell is in the table
            Assert.AreEqual(Encoding.UTF8.GetString(cell.rows[0].key), testKey);
            // delete cell
            client.DeleteCellsAsync(testTableName, testKey).Wait();
            // get cell again, 404 exception expected
            client.GetCellsAsync(testTableName, testKey).Wait();
        }
        public ItemDto GetDto(CellSet cellSet, string culture)
        {
            var itemRow = cellSet.rows.First();
            //Retrieve a cell by its key.
            var columnNameMapping = itemRow.values.ToDictionary(o => Encoding.UTF8.GetString(o.column), o => o.data);

            var item = new ItemDto();
            item.Code = Encoding.UTF8.GetString(itemRow.key);
            item.Status = Encoding.UTF8.GetString(columnNameMapping["CF1:Status"]);
        
            var hierarchyColumnKey = columnNameMapping.Keys.Where(o => o.StartsWith("CF1:MH"));
            item.MerchandiseHierarchy = _serializer.DeseralizeFromBson<HierarchyDto>(columnNameMapping[hierarchyColumnKey.First()]);

            item.CreatedDate = DateTime.Parse(Encoding.UTF8.GetString(columnNameMapping["CF1:CreatedDate"]));
            item.LastUpdateDate = DateTime.Parse(Encoding.UTF8.GetString(columnNameMapping["CF1:UpdatedDate"]));
            item.Description = Encoding.UTF8.GetString(columnNameMapping["CF1:Long_" + culture]);
            item.ShortDescription = Encoding.UTF8.GetString(columnNameMapping["CF1:Short_" + culture]);

            var groupColumnKeys = columnNameMapping.Keys.Where(o => o.StartsWith("CF1:G"));
            item.Groups = new List<HierarchyDto>();
            foreach (var groupKey in groupColumnKeys)
                item.Groups.Add(_serializer.DeseralizeFromBson<HierarchyDto>(columnNameMapping[groupKey]));

            var identifiersColumnKeys = columnNameMapping.Keys.Where(o => o.StartsWith("CF1:I"));
            item.Identifiers = new List<Identifier>();
            foreach (var identifierKey in identifiersColumnKeys)
                item.Identifiers.Add(new Identifier{ Value= Encoding.UTF8.GetString(columnNameMapping[identifierKey]),Type= identifierKey.Substring(5)});

            var attributesColumnKeys = columnNameMapping.Keys.Where(o => o.StartsWith("CF1:A" + culture));
            item.Attributes = new List<AttributeDto>();
            foreach (var attributeKey in attributesColumnKeys)
                item.Attributes.Add(new AttributeDto { Id = attributeKey.Substring(11), Value = _serializer.DeseralizeFromBson<AttributeValue>(columnNameMapping[attributeKey]) });

            

            return item;
        }
Esempio n. 17
0
            ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`', '{', '|', '}', '~' };   //ascii 58--64 + misc.

        private void CreateTweetByWordsCells(CellSet set, ITweet tweet)
        {
            var words = tweet.Text.ToLower().Split(_punctuationChars);
            int sentimentScore = CalcSentimentScore(words);
            var word_pairs = words.Take(words.Length - 1)
                                  .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1]));
            var all_words = words.Concat(word_pairs).ToList();

            foreach (var word in all_words)
            {
                var time_index = (ulong.MaxValue - 
                    (ulong)tweet.CreatedAt.ToBinary()).ToString().PadLeft(20) + tweet.IdStr;
                var key = word + "_" + time_index;
                var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(key) };

                var value = new Cell { 
                    column = Encoding.UTF8.GetBytes("d:id_str"),
                    data = Encoding.UTF8.GetBytes(tweet.IdStr) };
                row.values.Add(value);
                value = new Cell { 
                    column = Encoding.UTF8.GetBytes("d:lang"),
                    data = Encoding.UTF8.GetBytes(tweet.Language.ToString()) };
                row.values.Add(value);
                if (tweet.Coordinates != null)
                {
                    var str = tweet.Coordinates.Longitude.ToString() + "," + 
                              tweet.Coordinates.Latitude.ToString();
                    value = new Cell { 
                        column = Encoding.UTF8.GetBytes("d:coor"),
                        data = Encoding.UTF8.GetBytes(str) };
                    row.values.Add(value);
                }

                value = new Cell { 
                    column = Encoding.UTF8.GetBytes("d:sentiment"),
                    data = Encoding.UTF8.GetBytes(sentimentScore.ToString()) };
                row.values.Add(value);

                set.rows.Add(row);
            }
        }
Esempio n. 18
0
        private void CreateTweetByWordsCells(CellSet set, TweetIndexItem indexItem)
        {
            var word = indexItem.Word;
            var time_index = (ulong.MaxValue -
                              (ulong)indexItem.CreatedAt).ToString().PadLeft(20) + indexItem.IdStr;
            var key = word + "_" + time_index;
            var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(key) };

            var value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:id_str"),
                data = Encoding.UTF8.GetBytes(indexItem.IdStr)
            };
            row.values.Add(value);
            value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:lang"),
                data = Encoding.UTF8.GetBytes(indexItem.Language)
            };
            row.values.Add(value);

            value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:coor"),
                data = Encoding.UTF8.GetBytes(indexItem.Coordinates)
            };
            row.values.Add(value);

            value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:sentiment"),
                data = Encoding.UTF8.GetBytes(indexItem.SentimentScore.ToString())
            };
            row.values.Add(value);

            set.rows.Add(row);
        }
        /// <summary>
        /// Create a write set for HBase based on cached tuples
        /// We delay all the writes to batch the aggregations and writes for them
        /// </summary>
        /// <returns></returns>
        public void WriteToHBase()
        {
            Context.Logger.Info("WriteToHBase - Start - Writing cached rows into HBase. Rows to write: {0}", cachedTuples.Count);
            if (cachedTuples.Count > 0)
            {
                var writeSet = new CellSet();
                foreach (var cachedTuple in cachedTuples)
                {
                    var values = cachedTuple.Value.GetValues();
                    if(this.HBaseTableColumns.Count < (values.Count - 1))
                    {
                        throw new Exception(String.Format(
                            "Count of HBaseTableColumns is less than fields received. HBaseTableColumns.Count: {0}, Values.Count (without rowkey): {1}",
                            this.HBaseTableColumns.Count, values.Count - 1)
                            );
                    }

                    //Use the first value as rowkey and add the remaining as a list
                    var tablerow = new CellSet.Row { key = ToBytes(values[0]) };

                    //Skip the first value and read the remaining
                    for (int i = 1; i < values.Count; i++)
                    {
                        var rowcell = new Cell
                        {
                            //Based on our assumption that ColumnNames do NOT contain rowkey field
                            column = ToBytes(this.HBaseTableColumnFamily + ":" + this.HBaseTableColumns[i-1]),
                            data = ToBytes(values[i])
                        };
                        tablerow.values.Add(rowcell);
                    }

                    writeSet.rows.Add(tablerow);
                }

                try
                {
                    //Use the StoreCells API to write the cellset into HBase Table
                    HBaseClusterClient.StoreCells(this.HBaseTableName, writeSet);
                }
                catch
                {
                    Context.Logger.Error("HBase StoreCells Failed");
                    foreach(var row in writeSet.rows)
                    {
                        Context.Logger.Info("Failed RowKey: {0}, Values (bytes): {1}", Encoding.UTF8.GetString(row.key),
                            String.Join(", ", row.values.Select(v => Encoding.UTF8.GetString(v.column) + " = " + v.data.LongLength)));
                    }
                    throw;
                }
                Context.Logger.Info("WriteToHBase - End - Stored cells into HBase. Rows written: {0}", writeSet.rows.Count);
            }
            else
            {
                Context.Logger.Info("WriteToHBase - End - No cells to write.");
            }
        }
Esempio n. 20
0
 public IEnumerable<FSWordRelationship> FindRelationsWithWord(string word)
 {
     Scanner s = new Scanner()
     {
         batch = 1000
     };
     ScannerInformation si = client.CreateScanner(HadoopContext.HBaseWordRelationTableName, s);
     CellSet next = null;
     CellSet readRows = new CellSet();
     List<FSWordRelationship> relations = new List<FSWordRelationship>();
     while ((next = client.ScannerGetNext(si)) != null)
     {
         foreach (CellSet.Row row in next.rows)
         {
             //convert row into desired domain type....
             var w1 = row.values.Find(w => Encoding.UTF8.GetString(w.column) == "d:WordOne");
             string wordOne = Encoding.UTF8.GetString(w1.data);
             var w1Id = row.values.Find(w => Encoding.UTF8.GetString(w.column) == "d:WordOneId");
             int wordOneId = Convert.ToInt32(Encoding.UTF8.GetString(w1Id.data));
             var w2 = row.values.Find(w => Encoding.UTF8.GetString(w.column) == "d:WordTwo");
             string wordTwo = Encoding.UTF8.GetString(w2.data);
             var w2Id = row.values.Find(w => Encoding.UTF8.GetString(w.column) == "d:WordTwoId");
             int wordTwoId = Convert.ToInt32(Encoding.UTF8.GetString(w2Id.data));
             var rS = row.values.Find(w => Encoding.UTF8.GetString(w.column) == "d:RScore");
             var rScore = Convert.ToDouble(Encoding.UTF8.GetString(rS.data));
             var id = Encoding.UTF8.GetString(row.key);
             if (word.CompareTo(wordOne) == 0 || word.CompareTo(wordTwo) == 0
                 && !(String.IsNullOrWhiteSpace(wordOne) || String.IsNullOrWhiteSpace(wordTwo)))
             {
                 FSWordRelationship rel = new FSWordRelationship(id, wordOne, wordOneId, wordTwo, wordTwoId, rScore);
                 relations.Add(rel);
             }
         }
     }
     return relations;
 }
Esempio n. 21
0
        private void PopulateTable()
        {
            var client = new HBaseClient(_credentials);
            var cellSet = new CellSet();

            string id = Guid.NewGuid().ToString("N");
            for (int lineNumber = 0; lineNumber < 10; ++lineNumber)
            {
                string rowKey = string.Format(CultureInfo.InvariantCulture, "{0}-{1}", id, lineNumber);

                // add to expected records
                var rec = new FilterTestRecord(rowKey, lineNumber, Guid.NewGuid().ToString("N"), Guid.NewGuid().ToString("D"));
                _allExpectedRecords.Add(rec);

                // add to row
                var row = new CellSet.Row { key = _encoding.GetBytes(rec.RowKey) };

                var lineColumnValue = new Cell
                {
                    column = BuildCellColumn(ColumnFamilyName1, LineNumberColumnName),
                    data = BitConverter.GetBytes(rec.LineNumber)
                };
                row.values.Add(lineColumnValue);

                var paragraphColumnValue = new Cell { column = BuildCellColumn(ColumnFamilyName1, ColumnNameA), data = _encoding.GetBytes(rec.A) };
                row.values.Add(paragraphColumnValue);

                var columnValueB = new Cell { column = BuildCellColumn(ColumnFamilyName2, ColumnNameB), data = Encoding.UTF8.GetBytes(rec.B) };
                row.values.Add(columnValueB);

                cellSet.rows.Add(row);
            }

            client.StoreCellsAsync(_tableName, cellSet).Wait();
        }
        // Popular a CellSet object to be written into HBase
        private void CreateTweetByWordsCells(CellSet set, ITweet tweet)
        {
            // Split the Tweet into words
            string[] words = tweet.Text.ToLower().Split(_punctuationChars);

            // Calculate sentiment score base on the words
            int sentimentScore = CalcSentimentScore(words);
            var word_pairs = words.Take(words.Length - 1)
                                  .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1]));
            var all_words = words.Concat(word_pairs).ToList();

            // For each word in the Tweet add a row to the HBase table
            foreach (string word in all_words)
            {
                string time_index = (ulong.MaxValue - (ulong)tweet.CreatedAt.ToBinary()).ToString().PadLeft(20) + tweet.IdStr;
                string key = word + "_" + time_index;

                // Create a row
                var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(key) };

                // Add columns to the row, including Tweet identifier, language, coordinator(if available), and sentiment
                var value = new Cell { column = Encoding.UTF8.GetBytes("d:id_str"), data = Encoding.UTF8.GetBytes(tweet.IdStr) };
                row.values.Add(value);

                value = new Cell { column = Encoding.UTF8.GetBytes("d:lang"), data = Encoding.UTF8.GetBytes(tweet.Language.ToString()) };
                row.values.Add(value);

                if (tweet.Coordinates != null)
                {
                    var str = tweet.Coordinates.Longitude.ToString() + "," + tweet.Coordinates.Latitude.ToString();
                    value = new Cell { column = Encoding.UTF8.GetBytes("d:coor"), data = Encoding.UTF8.GetBytes(str) };
                    row.values.Add(value);
                }

                value = new Cell { column = Encoding.UTF8.GetBytes("d:sentiment"), data = Encoding.UTF8.GetBytes(sentimentScore.ToString()) };
                row.values.Add(value);

                set.rows.Add(row);
            }
        }
        public void TestStoreSingleCell()
        {
            const string testKey = "content";
            const string testValue = "the force is strong in this column";
            var client = new HBaseClient(_credentials);
            var set = new CellSet();
            var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(testKey) };
            set.rows.Add(row);

            var value = new Cell { column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue) };
            row.values.Add(value);

            client.StoreCells(_testTableName, set);

            CellSet cells = client.GetCells(_testTableName, testKey);
            Assert.AreEqual(1, cells.rows.Count);
            Assert.AreEqual(1, cells.rows[0].values.Count);
            Assert.AreEqual(testValue, Encoding.UTF8.GetString(cells.rows[0].values[0].data));
        }
        // Write a Tweet (CellSet) to HBase
        public void WriterThreadFunction()
        {
            try
            {
                while (threadRunning)
                {
                    if (queue.Count > 0)
                    {
                        CellSet set = new CellSet();
                        lock (queue)
                        {
                            do
                            {
                                ITweet tweet = queue.Dequeue();
                                CreateTweetByWordsCells(set, tweet);
                            } while (queue.Count > 0);
                        }

                        // Write the Tweet by words cell set to the HBase table
                        client.StoreCells(HBASETABLENAME, set);
                        Console.WriteLine("\tRows written: {0}", set.rows.Count);
                    }
                    Thread.Sleep(100);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Exception: " + ex.Message);
            }
        }
        /// <summary>
        /// Create a write set for HBase when the emit period completes
        /// We delay all the writes to batch the aggregations and writes for them
        /// </summary>
        /// <returns></returns>
        public override bool EmitAggregations()
        {
            try
            {
                if (emitstopwatch.Elapsed > this.appConfig.AggregationWindow)
                {
                    local_hbasescan_count = 0;
                    var writeset = new CellSet();
                    var emitime = DateTime.UtcNow.Floor(this.appConfig.AggregationWindow).Subtract(this.appConfig.EmitWindow);
                    var keystoremove = new List<DateTime>();

                    var dtkeys = aggregatedCounts.Keys.Where(dt => dt < emitime).OrderBy(dt => dt).ToList();

                    var hbaseresultsets = new Dictionary<string, Dictionary<string, Dictionary<string, double>>>();

                    if (dtkeys.Count > 0)
                    {
                        var startdt = dtkeys[0];
                        var enddt = dtkeys[dtkeys.Count-1];

                        foreach (var dtkey in dtkeys)
                        {
                            foreach (var pkey in aggregatedCounts[dtkey].Keys)
                            {
                                if (!this.appConfig.HBaseOverwrite && !hbaseresultsets.ContainsKey(pkey))
                                {
                                    hbaseresultsets.Add(pkey, 
                                        ScanHBase(
                                        pkey + Utilities.KEY_DELIMITER + startdt.ToString(Utilities.DATE_TIME_FORMAT),
                                        pkey + Utilities.KEY_DELIMITER + enddt.ToString(Utilities.DATE_TIME_FORMAT)));
                                }

                                var rowkey = pkey + Utilities.KEY_DELIMITER + dtkey.ToString(Utilities.DATE_TIME_FORMAT);
                                var tablerow = new CellSet.Row { key = Encoding.UTF8.GetBytes(rowkey) };
                                foreach (var skey in aggregatedCounts[dtkey][pkey].Keys)
                                {
                                    var columnkey = "v:" + skey;
                                    double previousdata = 0;

                                    if (hbaseresultsets.ContainsKey(pkey) && 
                                        hbaseresultsets[pkey].ContainsKey(rowkey) && 
                                        hbaseresultsets[pkey][rowkey].ContainsKey(columnkey))
                                    {
                                        previousdata = hbaseresultsets[pkey][rowkey][columnkey];
                                    }

                                    var rowcell = new Cell
                                    {
                                        column = Encoding.UTF8.GetBytes(columnkey),
                                        data = BitConverter.GetBytes(previousdata + aggregatedCounts[dtkey][pkey][skey])
                                    };

                                    tablerow.values.Add(rowcell);
                                    global_emit_count++;
                                    last_emit_count++;
                                    current_cache_size--;
                                }
                                writeset.rows.Add(tablerow);
                            }
                            keystoremove.Add(dtkey);
                        }
                    }

                    if (writeset != null && writeset.rows.Count > 0)
                    {
                        Context.Logger.Info("HBaseTableName: {0} - Rows to write: {1}, First Rowkey = {2}", 
                            this.HBaseTableName, writeset.rows.Count, Encoding.UTF8.GetString(writeset.rows[0].key));
                        
                        var localstopwatch = new Stopwatch();
                        localstopwatch.Start();
                        //Use the StoreCells API to write the cellset into HBase Table
                        HBaseClusterClient.StoreCells(this.HBaseTableName, writeset);
                        Context.Logger.Info("HBase: Table = {0}, Rows Written = {1}, Write Time = {2} secs, Time since last write = {3} secs",
                            this.HBaseTableName, writeset.rows.Count, localstopwatch.Elapsed.TotalSeconds, emitstopwatch.Elapsed.TotalSeconds);

                        foreach (var key in keystoremove)
                        {
                            aggregatedCounts.Remove(key);
                        }
                        last_emit_in_secs = emitstopwatch.Elapsed.TotalSeconds;
                        emitstopwatch.Restart();
                    }

                    if (!this.appConfig.HBaseOverwrite)
                    {
                        Context.Logger.Info("ScanHBase: Last Window Scan Count = {0}, Table = {1}", local_hbasescan_count, this.HBaseTableName);
                    }
                }

                if (last_emit_count > 0)
                {
                    return true;
                }
                else
                {
                    return false;
                }
            }
            catch (Exception ex)
            {
                last_error_count++;
                global_error_count++;
                Context.Logger.Error(ex.ToString());
                return false;
            }
        }
Esempio n. 26
0
        /// <summary>
        /// batch insert data to hbase
        /// </summary>
        /// <param name="tableName"></param>
        /// <param name="cellSet"></param>
        /// <param name="num"></param>
        /// <returns></returns>
        public static async Task HbaseBatchInsertAsync(string tableName, CellSet cellSet)
        {
            if (hbaseClient == null)
                hbaseClient = CreateHBaseClient(clusterURL, httpName, httpUserPassword);

            await hbaseClient.StoreCellsAsync(tableName, cellSet);

        }
Esempio n. 27
0
 // Write a Tweet (CellSet) to HBase
 public void WriterThreadFunction()
 {
     while (ThreadRunning)
     {
         if (WriteQueue.Count > 0)
         {
             CellSet set = new CellSet();
             lock (WriteQueue)
             {
                 do
                 {
                     TweetSentimentData tweet = WriteQueue.Dequeue();
                     CreateTweetByWordsCells(set, tweet);
                 } while (WriteQueue.Count > 0);
             }
             // Write the Tweet by words cell set to the HBase table
             client.StoreCells(this.HBaseTableName, set);
             Console.WriteLine("\tRows written: {0}", set.rows.Count);
         }
     }
 }
Esempio n. 28
0
        public void WriterThreadFunction()
        {
            while(threadRunning)
            {
                try
                {
                    if (queue.Count > 0)
                    {
                        var set = new CellSet();
                        lock (queue)
                        {
                            do
                            {
                                var tweet = queue.Dequeue();

                                CreateTweetByWordsCells(set, tweet);

                            } while (queue.Count > 0);
                        }

                        // Update count of rows as part of the same batch
                        CreateRowCountCell(set, rowCount + set.rows.Count);

                        client.StoreCells(TABLE_BY_WORDS_NAME, set);
                        rowCount += set.rows.Count;

                        Console.WriteLine("===== {0} rows written =====", set.rows.Count);
                    }
                    Thread.Sleep(100);
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Exception: " + ex.Message + "\nStackTrace: \n" + ex.StackTrace);
                }
            }
        }
Esempio n. 29
0
 private static void CreateTweetCells(CellSet set, ITweet tweet)
 {
     var key = tweet.IdStr;
     var row = new CellSet.Row { key = Encoding.UTF8.GetBytes(key) };
     var value = new Cell { column = Encoding.UTF8.GetBytes("d:created_at"), data = Encoding.UTF8.GetBytes(tweet.CreatedAt.ToLongTimeString()) };
     row.values.Add(value);
     value = new Cell { column = Encoding.UTF8.GetBytes("d:text"), data = Encoding.UTF8.GetBytes(tweet.Text) };
     row.values.Add(value);
     value = new Cell { column = Encoding.UTF8.GetBytes("d:lang"), data = Encoding.UTF8.GetBytes(tweet.Language.ToString()) };
     row.values.Add(value);
     if (tweet.Coordinates != null)
     {
         var str = tweet.Coordinates.Longitude.ToString() + "," + tweet.Coordinates.Latitude.ToString();
         value = new Cell { column = Encoding.UTF8.GetBytes("d:coor"), data = Encoding.UTF8.GetBytes(str) };
         row.values.Add(value);
     }
     if (tweet.Place != null)
     {
         value = new Cell { column = Encoding.UTF8.GetBytes("d:place_fullname"), data = Encoding.UTF8.GetBytes(tweet.Place.FullName) };
         row.values.Add(value);
     }
     set.rows.Add(row);
 }
Esempio n. 30
0
        /// <summary>
        /// batch insert data to hbase
        /// </summary>
        /// <param name="tableName"></param>
        /// <param name="cellSetRows"></param>
        /// <param name="num"></param>
        public static async Task HbaseBatchInsertAsync(string tableName, List<CellSet.Row> cellSetRows)
        {
            CellSet cellSet = new CellSet();
            foreach (var cellSetRow in cellSetRows)
            {
                cellSet.rows.Add(cellSetRow);
            }

            await HbaseBatchInsertAsync(tableName, cellSet);

        }