Пример #1
0
        public void TestStoreSingleCell()
        {
            const string testKey   = "content";
            const string testValue = "the force is strong in this column";
            var          client    = CreateClient();
            var          set       = new CellSet();
            var          row       = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(testKey)
            };

            set.rows.Add(row);

            var value = new Cell {
                column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue)
            };

            row.values.Add(value);

            client.StoreCellsAsync(testTableName, set).Wait();

            CellSet cells = client.GetCellsAsync(testTableName, testKey).Result;

            Assert.AreEqual(1, cells.rows.Count);
            Assert.AreEqual(1, cells.rows[0].values.Count);
            Assert.AreEqual(testValue, Encoding.UTF8.GetString(cells.rows[0].values[0].data));
        }
Пример #2
0
        public void TestCellsMultiVersionGet()
        {
            const string testKey   = "content";
            const string testValue = "the force is strong in this column";
            var          client    = CreateClient();
            var          set       = new CellSet();
            var          row       = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(testKey)
            };

            set.rows.Add(row);

            var value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:starwars"),
                data   = Encoding.UTF8.GetBytes(testValue)
            };

            row.values.Add(value);

            client.StoreCellsAsync(testTableName, set).Wait();
            client.StoreCellsAsync(testTableName, set).Wait();
            var cell = client.GetCellsAsync(testTableName, testKey, "d:starwars", "3").Result;

            Assert.Equal(2, cell.rows[0].values.Count);
        }
Пример #3
0
        /// <summary>
        /// Automically checks if a row/family/qualifier value matches the expected value and deletes
        /// </summary>
        /// <param name="table">the table</param>
        /// <param name="cellToCheck">cell to check for deleting the row</param>
        /// <returns>true if the record was deleted; false if condition failed at check</returns>
        public async Task <bool> CheckAndDeleteAsync(string table, Cell cellToCheck, CellSet.Row rowToDelete = null, RequestOptions options = null)
        {
            table.ArgumentNotNullNorEmpty("table");
            cellToCheck.ArgumentNotNull("cellToCheck");

            CellSet.Row row;
            if (rowToDelete != null)
            {
                row = rowToDelete;
            }
            else
            {
                row = new CellSet.Row()
                {
                    key = cellToCheck.row
                };
            }

            row.values.Add(cellToCheck);
            var cellSet = new CellSet();

            cellSet.rows.Add(row);
            var optionToUse = options ?? _globalRequestOptions;

            return(await optionToUse.RetryPolicy.ExecuteAsync <bool>(() => StoreCellsAsyncInternal(table, cellSet, optionToUse, Encoding.UTF8.GetString(row.key), CheckAndDeleteQuery)));
        }
Пример #4
0
        public void TestCellsDeletion()
        {
            const string testKey   = "content";
            const string testValue = "the force is strong in this column";
            var          client    = CreateClient();
            var          set       = new CellSet();
            var          row       = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(testKey)
            };

            set.rows.Add(row);

            var value = new Cell {
                column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue)
            };

            row.values.Add(value);

            client.StoreCellsAsync(testTableName, set).Wait();
            CellSet cell = client.GetCellsAsync(testTableName, testKey).Result;

            // make sure the cell is in the table
            Assert.AreEqual(Encoding.UTF8.GetString(cell.rows[0].key), testKey);
            // delete cell
            client.DeleteCellsAsync(testTableName, testKey).Wait();
            // get cell again, 404 exception expected
            client.GetCellsAsync(testTableName, testKey).Wait();
        }
        /// <summary>
        /// Create a write set for HBase based on cached tuples
        /// We delay all the writes to batch the aggregations and writes for them
        /// </summary>
        /// <returns></returns>
        public void WriteToHBase()
        {
            Context.Logger.Info("WriteToHBase - Start - Writing cached rows into HBase. Rows to write: {0}", cachedTuples.Count);
            if (cachedTuples.Count > 0)
            {
                var writeSet = new CellSet();
                foreach (var cachedTuple in cachedTuples)
                {
                    var values = cachedTuple.Value.GetValues();
                    if (this.HBaseTableColumns.Count < (values.Count - 1))
                    {
                        throw new Exception(String.Format(
                                                "Count of HBaseTableColumns is less than fields received. HBaseTableColumns.Count: {0}, Values.Count (without rowkey): {1}",
                                                this.HBaseTableColumns.Count, values.Count - 1)
                                            );
                    }

                    //Use the first value as rowkey and add the remaining as a list
                    var tablerow = new CellSet.Row {
                        key = TypeHelper.ToBytes(values[0])
                    };

                    //Skip the first value and read the remaining
                    for (int i = 1; i < values.Count; i++)
                    {
                        var rowcell = new Cell
                        {
                            //Based on our assumption that ColumnNames do NOT contain rowkey field
                            column = TypeHelper.ToBytes(this.HBaseTableColumnFamily + ":" + this.HBaseTableColumns[i - 1]),
                            data   = TypeHelper.ToBytes(values[i])
                        };
                        tablerow.values.Add(rowcell);
                    }

                    writeSet.rows.Add(tablerow);
                }

                try
                {
                    //Use the StoreCells API to write the cellset into HBase Table
                    HBaseClusterClient.StoreCells(this.HBaseTableName, writeSet);
                }
                catch
                {
                    Context.Logger.Error("HBase StoreCells Failed");
                    foreach (var row in writeSet.rows)
                    {
                        Context.Logger.Info("Failed RowKey: {0}, Values (bytes): {1}", Encoding.UTF8.GetString(row.key),
                                            String.Join(", ", row.values.Select(v => Encoding.UTF8.GetString(v.column) + " = " + v.data.LongLength)));
                    }
                    throw;
                }
                Context.Logger.Info("WriteToHBase - End - Stored cells into HBase. Rows written: {0}", writeSet.rows.Count);
            }
            else
            {
                Context.Logger.Info("WriteToHBase - End - No cells to write.");
            }
        }
Пример #6
0
        public void WriteWordRelation(FSWordRelationship relation)
        {
            CellSet.Row row = TableDomainMappers.WordRelationToRow(relation);
            CellSet     set = new CellSet();

            set.rows.Add(row);
            HadoopContext.HBaseClient.StoreCells(HadoopContext.WordRelationTableName, set);
        }
Пример #7
0
        public void WriteTweet(FSTweet tweet)
        {
            CellSet.Row row = TableDomainMappers.TweetToRow(tweet);
            CellSet     set = new CellSet();

            set.rows.Add(row);
            HadoopContext.HBaseClient.StoreCells(HadoopContext.TweetTableName, set);
        }
Пример #8
0
        public void WriteConversation(FSConversation convo)
        {
            CellSet.Row row = TableDomainMappers.ConversationToRow(convo);
            CellSet     set = new CellSet();

            set.rows.Add(row);
            HadoopContext.HBaseClient.StoreCells(HadoopContext.ConversationTableName, set);
            Thread.Sleep(100);
        }
Пример #9
0
        When_I_CheckAndDeleteCells_With_TimeStamp_And_Cells_To_Delete_I_Can_add_with_higher_timestamp()
        {
            var client = GetClient();

            client.StoreCellsAsync(_tableName, CreateCellSet(GetCellSet("3", "c1", "1A", 10))).Wait();
            client.StoreCellsAsync(_tableName, CreateCellSet(GetCellSet("3", "c2", "1A", 10))).Wait();

            // Deletes all the ColumnFamily with timestamp less than 10
            var rowToDelete = new CellSet.Row {
                key = Encoding.UTF8.GetBytes("3")
            };

            //rowToDelete.values.Add(GetCell(rowToDelete.key, column = BuildCellColumn(ColumnFamilyName1, "c1"), data= "1A", timestamp = 10 });
            rowToDelete.values.Add(GetCell("3", "c1", "1A", 10));
            rowToDelete.values.Add(GetCell("3", "c2", "1A", 10));
            var deleted = await client.CheckAndDeleteAsync(_tableName, GetCell("3", "c1", "1A", 10), rowToDelete);

            deleted.ShouldEqual(true);

            CellSet retrievedCells;

            try
            {
                // All  cells are deleted so this should fail
                retrievedCells = client.GetCellsAsync(_tableName, "3").Result;
                throw new AssertFailedException("expecting Get '3' to fail as all cells are removed");
            }
            catch (Exception ex)
            {
                if (ex is AggregateException)
                {
                    ((ex.InnerException as WebException).Response as HttpWebResponse).StatusCode.ShouldEqual(
                        HttpStatusCode.NotFound);
                }
                else
                {
                    throw ex;
                }
            }

            client.StoreCellsAsync(_tableName, CreateCellSet(GetCellSet("3", "c1", "1B", 11))).Wait();

            try
            {
                retrievedCells = client.GetCellsAsync(_tableName, "3").Result;
                retrievedCells.rows[0].values.Count.ShouldEqual(1);
                Encoding.UTF8.GetString(retrievedCells.rows[0].values[0].column).ShouldBeEqualOrdinalIgnoreCase("c1");
            }
            catch (Exception ex)
            {
                if (ex is AggregateException)
                {
                    ((ex.InnerException as WebException).Response as HttpWebResponse).StatusCode.ShouldEqual(
                        HttpStatusCode.NotFound);
                }
            }
        }
Пример #10
0
        public async Task When_I_CheckAndDeleteCells_With_ColumnFamily_Deletes_All_cells()
        {
            var client = GetClient();

            client.StoreCellsAsync(_tableName, CreateCellSet(GetCellSet("3", "c1", "1A", 10))).Wait();
            client.StoreCellsAsync(_tableName, CreateCellSet(GetCellSet("3", "c2", "1A", 10))).Wait();

            // Deletes all the ColumnFamily with timestamp less than 10
            var rowToDelete = new CellSet.Row {
                key = Encoding.UTF8.GetBytes("3")
            };

            rowToDelete.values.Add(new Cell
            {
                row       = rowToDelete.key,
                column    = Encoding.UTF8.GetBytes(ColumnFamilyName1),
                timestamp = 10
            });
            var deleted = await client.CheckAndDeleteAsync(_tableName, GetCell("3", "c1", "1A", 10), rowToDelete);

            deleted.ShouldEqual(true);

            CellSet retrievedCells;

            try
            {
                // All  cells are deleted so this should fail
                retrievedCells = client.GetCellsAsync(_tableName, "3").Result;
                throw new AssertFailedException("expecting Get '3' to fail as all cells are removed");
            }
            catch (Exception ex)
            {
                if (ex is AggregateException)
                {
                    ((ex.InnerException as WebException).Response as HttpWebResponse).StatusCode.ShouldEqual(
                        HttpStatusCode.NotFound);
                }
            }

            client.StoreCellsAsync(_tableName, CreateCellSet(GetCellSet("3", "c1", "1B", 11))).Wait();

            try
            {
                retrievedCells = client.GetCellsAsync(_tableName, "3").Result;
                retrievedCells.rows[0].values.Count.ShouldEqual(1);
            }
            catch (Exception ex)
            {
                if (ex is AggregateException)
                {
                    ((ex.InnerException as WebException).Response as HttpWebResponse).StatusCode.ShouldEqual(
                        HttpStatusCode.NotFound);
                }
            }
        }
Пример #11
0
        /// <summary>
        /// Atomically checks if a row/family/qualifier value matches the expected value and updates
        /// </summary>
        /// <param name="table">the table</param>
        /// <param name="row">row to update</param>
        /// <param name="cellToCheck">cell to check</param>
        /// <returns>true if the record was updated; false if condition failed at check</returns>
        public async Task <bool> CheckAndPutAsync(string table, CellSet.Row row, Cell cellToCheck, RequestOptions options = null)
        {
            table.ArgumentNotNullNorEmpty("table");
            row.ArgumentNotNull("row");
            row.values.Add(cellToCheck);
            var cellSet = new CellSet();

            cellSet.rows.Add(row);
            var optionToUse = options ?? _globalRequestOptions;

            return(await optionToUse.RetryPolicy.ExecuteAsync <bool>(() => StoreCellsAsyncInternal(table, cellSet, optionToUse, Encoding.UTF8.GetString(row.key), CheckAndPutQuery)));
        }
Пример #12
0
        // Popular a CellSet object to be written into HBase
        private void CreateTweetByWordsCells(CellSet set, ITweet tweet)
        {
            // Split the Tweet into words
            string[] words = tweet.Text.ToLower().Split(_punctuationChars);

            // Calculate sentiment score base on the words
            int sentimentScore = CalcSentimentScore(words);
            var word_pairs     = words.Take(words.Length - 1)
                                 .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1]));
            var all_words = words.Concat(word_pairs).ToList();

            // For each word in the Tweet add a row to the HBase table
            foreach (string word in all_words)
            {
                string time_index = (ulong.MaxValue - (ulong)tweet.CreatedAt.ToBinary()).ToString().PadLeft(20) + tweet.IdStr;
                string key        = word + "_" + time_index;

                // Create a row
                var row = new CellSet.Row {
                    key = Encoding.UTF8.GetBytes(key)
                };

                // Add columns to the row, including Tweet identifier, language, coordinator(if available), and sentiment
                var value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:id_str"), data = Encoding.UTF8.GetBytes(tweet.IdStr)
                };
                row.values.Add(value);

                value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:lang"), data = Encoding.UTF8.GetBytes(tweet.Language.ToString())
                };
                row.values.Add(value);

                if (tweet.Coordinates != null)
                {
                    var str = tweet.Coordinates.Longitude.ToString() + "," + tweet.Coordinates.Latitude.ToString();
                    value = new Cell {
                        column = Encoding.UTF8.GetBytes("d:coor"), data = Encoding.UTF8.GetBytes(str)
                    };
                    row.values.Add(value);
                }

                value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:sentiment"), data = Encoding.UTF8.GetBytes(sentimentScore.ToString())
                };
                row.values.Add(value);

                set.rows.Add(row);
            }
        }
Пример #13
0
        private void CreateRowCountCell(CellSet set, long count)
        {
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(COUNT_ROW_KEY)
            };

            var value = new Cell
            {
                column = Encoding.UTF8.GetBytes(COUNT_COLUMN_NAME),
                data   = Encoding.UTF8.GetBytes(count.ToString())
            };

            row.values.Add(value);
            set.rows.Add(row);
        }
Пример #14
0
        };                                                                                      //ascii 58--64 + misc.

        private void CreateTweetByWordsCells(CellSet set, ITweet tweet)
        {
            var words          = tweet.Text.ToLower().Split(_punctuationChars);
            int sentimentScore = CalcSentimentScore(words);
            var word_pairs     = words.Take(words.Length - 1)
                                 .Select((word, idx) => string.Format("{0} {1}", word, words[idx + 1]));
            var all_words = words.Concat(word_pairs).ToList();

            foreach (var word in all_words)
            {
                var time_index = (ulong.MaxValue -
                                  (ulong)tweet.CreatedAt.ToBinary()).ToString().PadLeft(20) + tweet.IdStr;
                var key = word + "_" + time_index;
                var row = new CellSet.Row {
                    key = Encoding.UTF8.GetBytes(key)
                };

                var value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:id_str"),
                    data   = Encoding.UTF8.GetBytes(tweet.IdStr)
                };
                row.values.Add(value);
                value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:lang"),
                    data   = Encoding.UTF8.GetBytes(tweet.Language.ToString())
                };
                row.values.Add(value);
                if (tweet.Coordinates != null)
                {
                    var str = tweet.Coordinates.Longitude.ToString() + "," +
                              tweet.Coordinates.Latitude.ToString();
                    value = new Cell {
                        column = Encoding.UTF8.GetBytes("d:coor"),
                        data   = Encoding.UTF8.GetBytes(str)
                    };
                    row.values.Add(value);
                }

                value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:sentiment"),
                    data   = Encoding.UTF8.GetBytes(sentimentScore.ToString())
                };
                row.values.Add(value);

                set.rows.Add(row);
            }
        }
Пример #15
0
        public static CellSet.Row ConversationToRow(FSConversation convo)
        {
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(convo.Id)
            };

            // Add columns to the row
            for (int i = 0; i < convo.Tweets.Count(); i++)
            {
                row.values.Add(
                    new Cell
                {
                    column = Encoding.UTF8.GetBytes("d:TweetId_" + i.ToString()),
                    data   = Encoding.UTF8.GetBytes(convo.Tweets.ElementAt(i).Id)
                });
            }
            return(row);
        }
Пример #16
0
        public void TestGetCellsWithMultiGetRequest()
        {
            var testKey1   = Guid.NewGuid().ToString();
            var testKey2   = Guid.NewGuid().ToString();
            var testValue1 = "the force is strong in this column " + testKey1;
            var testValue2 = "the force is strong in this column " + testKey2;

            var client = CreateClient();
            var set    = new CellSet();
            var row1   = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(testKey1)
            };
            var row2 = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(testKey2)
            };

            set.rows.Add(row1);
            set.rows.Add(row2);

            var value1 = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:starwars"),
                data   = Encoding.UTF8.GetBytes(testValue1)
            };
            var value2 = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:starwars"),
                data   = Encoding.UTF8.GetBytes(testValue2)
            };

            row1.values.Add(value1);
            row2.values.Add(value2);

            client.StoreCellsAsync(testTableName, set).Wait();

            var cells = client.GetCellsAsync(testTableName, new[] { testKey1, testKey2 }).Result;

            Assert.Equal(2, cells.rows.Count);
            Assert.Single(cells.rows[0].values);
            Assert.Equal(testValue1, Encoding.UTF8.GetString(cells.rows[0].values[0].data));
            Assert.Single(cells.rows[1].values);
            Assert.Equal(testValue2, Encoding.UTF8.GetString(cells.rows[1].values[0].data));
        }
Пример #17
0
        // Popular a CellSet object to be written into HBase
        private void CreateTweetByWordsCells(CellSet set, TweetSentimentData tweet)
        {
            // Create a row with a key
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(tweet.Id)
            };

            // Add columns to the row
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:Text"),
                data   = Encoding.UTF8.GetBytes(tweet.Text)
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:CreatedOn"),
                data   = Encoding.UTF8.GetBytes(tweet.CreatedOn.ToString())
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:ReplyToId"),
                data   = Encoding.UTF8.GetBytes(tweet.ReplyToId)
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:Sentiment"),
                data   = Encoding.UTF8.GetBytes(tweet.Sentiment.ToString())
            });
            if (tweet.Coordinates != null)
            {
                row.values.Add(
                    new Cell
                {
                    column = Encoding.UTF8.GetBytes("d:Coordinates"),
                    data   = Encoding.UTF8.GetBytes(tweet.Coordinates)
                });
            }
            set.rows.Add(row);
        }
Пример #18
0
        private void PopulateTable()
        {
            var client  = new HBaseClient(_credentials);
            var cellSet = new CellSet();

            string id = Guid.NewGuid().ToString("N");

            for (int lineNumber = 0; lineNumber < 10; ++lineNumber)
            {
                string rowKey = string.Format(CultureInfo.InvariantCulture, "{0}-{1}", id, lineNumber);

                // add to expected records
                var rec = new FilterTestRecord(rowKey, lineNumber, Guid.NewGuid().ToString("N"), Guid.NewGuid().ToString("D"));
                _allExpectedRecords.Add(rec);

                // add to row
                var row = new CellSet.Row {
                    key = _encoding.GetBytes(rec.RowKey)
                };

                var lineColumnValue = new Cell
                {
                    column = BuildCellColumn(ColumnFamilyName1, LineNumberColumnName),
                    data   = BitConverter.GetBytes(rec.LineNumber)
                };
                row.values.Add(lineColumnValue);

                var paragraphColumnValue = new Cell {
                    column = BuildCellColumn(ColumnFamilyName1, ColumnNameA), data = _encoding.GetBytes(rec.A)
                };
                row.values.Add(paragraphColumnValue);

                var columnValueB = new Cell {
                    column = BuildCellColumn(ColumnFamilyName2, ColumnNameB), data = Encoding.UTF8.GetBytes(rec.B)
                };
                row.values.Add(columnValueB);

                cellSet.rows.Add(row);
            }

            client.StoreCells(_tableName, cellSet);
        }
Пример #19
0
        public void StoreTestData(IHBaseClient hbaseClient)
        {
            // we are going to insert the keys 0 to 100 and then do some range queries on that
            const string testValue = "the force is strong in this column";
            var          set       = new CellSet();

            for (int i = 0; i < 100; i++)
            {
                var row = new CellSet.Row {
                    key = BitConverter.GetBytes(i)
                };
                var value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue)
                };
                row.values.Add(value);
                set.rows.Add(row);
            }

            hbaseClient.StoreCellsAsync(testTableName, set).Wait();
        }
Пример #20
0
        private CellSet.Row GetCellSet(string key, string columnName, string value, long timestamp)
        {
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(key)
            };
            var c1 = new Cell {
                column = BuildCellColumn(ColumnFamilyName1, columnName), row = row.key
            };

            if (value != null)
            {
                c1.data = Encoding.UTF8.GetBytes(value);
            }

            if (timestamp > 0)
            {
                c1.timestamp = timestamp;
            }
            row.values.Add(c1);
            return(row);
        }
Пример #21
0
        public static CellSet.Row TweetToRow(FSTweet tweet)
        {
            // Create a row with a key
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(tweet.Id)
            };

            // Add columns to the row
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:Text"),
                data   = Encoding.UTF8.GetBytes(tweet.Text)
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:CreatedOn"),
                data   = Encoding.UTF8.GetBytes(tweet.CreatedOn.ToString())
            });
            if (tweet.ReplyToId != null)
            {
                row.values.Add(
                    new Cell
                {
                    column = Encoding.UTF8.GetBytes("d:ReplyToId"),
                    data   = Encoding.UTF8.GetBytes(tweet.ReplyToId)
                });
            }
            if (tweet.Coordinates != null)
            {
                row.values.Add(
                    new Cell
                {
                    column = Encoding.UTF8.GetBytes("d:Coordinates"),
                    data   = Encoding.UTF8.GetBytes(tweet.Coordinates)
                });
            }
            return(row);
        }
Пример #22
0
        public static CellSet.Row AnnotatedTweetToRow(FSAnnotatedTweet tweet)
        {
            // Create a row with a key
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(tweet.Id)
            };

            // Add columns to the row
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:TweetId"),
                data   = Encoding.UTF8.GetBytes(tweet.TweetId)
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:AnnotatedBy"),
                data   = Encoding.UTF8.GetBytes(tweet.AnnotatedBy)
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:AnnotatedOn"),
                data   = Encoding.UTF8.GetBytes(tweet.AnnotatedOn.ToString())
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:SentenceType"),
                data   = Encoding.UTF8.GetBytes(tweet.SentenceType.ToString())
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:Sentiment"),
                data   = Encoding.UTF8.GetBytes(tweet.Sentiment.ToString())
            });
            return(row);
        }
Пример #23
0
        private void CreateTweetByWordsCells(CellSet set, TweetIndexItem indexItem)
        {
            var word       = indexItem.Word;
            var time_index = (ulong.MaxValue -
                              (ulong)indexItem.CreatedAt).ToString().PadLeft(20) + indexItem.IdStr;
            var key = word + "_" + time_index;
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(key)
            };

            var value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:id_str"),
                data   = Encoding.UTF8.GetBytes(indexItem.IdStr)
            };

            row.values.Add(value);
            value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:lang"),
                data   = Encoding.UTF8.GetBytes(indexItem.Language)
            };
            row.values.Add(value);

            value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:coor"),
                data   = Encoding.UTF8.GetBytes(indexItem.Coordinates)
            };
            row.values.Add(value);

            value = new Cell
            {
                column = Encoding.UTF8.GetBytes("d:sentiment"),
                data   = Encoding.UTF8.GetBytes(indexItem.SentimentScore.ToString())
            };
            row.values.Add(value);

            set.rows.Add(row);
        }
Пример #24
0
        public static void CreateSampleTable(string connectionString, string tableName, IEnumerable <IReadOnlyDictionary <string, object> > documents)
        {
            var client = CreateClient(connectionString);

            client.CreateTable(new TableSchema()
            {
                name    = tableName,
                columns = { new ColumnSchema()
                            {
                                name = "data"
                            } }
            });

            foreach (var document in documents)
            {
                var row = new CellSet.Row
                {
                    key = Encoding.UTF8.GetBytes(document[RowIdPropertyName].ToString())
                };

                foreach (var property in document)
                {
                    if (property.Key == RowIdPropertyName)
                    {
                        continue;
                    }

                    row.values.Add(
                        new Cell {
                        column = Encoding.UTF8.GetBytes("data:" + property.Key),
                        data   = property.Value == null ? null
                                : Encoding.UTF8.GetBytes(property.Value.ToString())
                    });
                }

                client.StoreCells(tableName, new CellSet {
                    rows = { row }
                });
            }
        }
Пример #25
0
        public static CellSet.Row WordRelationToRow(FSWordRelationship relation)
        {
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(relation.Id)
            };

            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:WordOne"),
                data   = Encoding.UTF8.GetBytes(relation.WordOne)
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:WordOneId"),
                data   = Encoding.UTF8.GetBytes(relation.WordOneId.ToString())
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:WordTwo"),
                data   = Encoding.UTF8.GetBytes(relation.WordTwo)
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:WordTwoId"),
                data   = Encoding.UTF8.GetBytes(relation.WordTwoId.ToString())
            });
            row.values.Add(
                new Cell
            {
                column = Encoding.UTF8.GetBytes("d:RScore"),
                data   = Encoding.UTF8.GetBytes(relation.rScore.ToString())
            });
            return(row);
        }
Пример #26
0
        private static void CreateTweetCells(CellSet set, ITweet tweet)
        {
            var key = tweet.IdStr;
            var row = new CellSet.Row {
                key = Encoding.UTF8.GetBytes(key)
            };
            var value = new Cell {
                column = Encoding.UTF8.GetBytes("d:created_at"), data = Encoding.UTF8.GetBytes(tweet.CreatedAt.ToLongTimeString())
            };

            row.values.Add(value);
            value = new Cell {
                column = Encoding.UTF8.GetBytes("d:text"), data = Encoding.UTF8.GetBytes(tweet.Text)
            };
            row.values.Add(value);
            value = new Cell {
                column = Encoding.UTF8.GetBytes("d:lang"), data = Encoding.UTF8.GetBytes(tweet.Language.ToString())
            };
            row.values.Add(value);
            if (tweet.Coordinates != null)
            {
                var str = tweet.Coordinates.Longitude.ToString() + "," + tweet.Coordinates.Latitude.ToString();
                value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:coor"), data = Encoding.UTF8.GetBytes(str)
                };
                row.values.Add(value);
            }
            if (tweet.Place != null)
            {
                value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:place_fullname"), data = Encoding.UTF8.GetBytes(tweet.Place.FullName)
                };
                row.values.Add(value);
            }
            set.rows.Add(row);
        }
        /// <summary>
        /// Create a write set for HBase when the emit period completes
        /// We delay all the writes to batch the aggregations and writes for them
        /// </summary>
        /// <returns></returns>
        public override bool EmitAggregations()
        {
            try
            {
                if (emitstopwatch.Elapsed > this.appConfig.AggregationWindow)
                {
                    local_hbasescan_count = 0;
                    var writeset     = new CellSet();
                    var emitime      = DateTime.UtcNow.Floor(this.appConfig.AggregationWindow).Subtract(this.appConfig.EmitWindow);
                    var keystoremove = new List <DateTime>();

                    var dtkeys = aggregatedCounts.Keys.Where(dt => dt < emitime).OrderBy(dt => dt).ToList();

                    var hbaseresultsets = new Dictionary <string, Dictionary <string, Dictionary <string, double> > >();

                    if (dtkeys.Count > 0)
                    {
                        var startdt = dtkeys[0];
                        var enddt   = dtkeys[dtkeys.Count - 1];

                        foreach (var dtkey in dtkeys)
                        {
                            foreach (var pkey in aggregatedCounts[dtkey].Keys)
                            {
                                if (!this.appConfig.HBaseOverwrite && !hbaseresultsets.ContainsKey(pkey))
                                {
                                    hbaseresultsets.Add(pkey,
                                                        ScanHBase(
                                                            pkey + Utilities.KEY_DELIMITER + startdt.ToString(Utilities.DATE_TIME_FORMAT),
                                                            pkey + Utilities.KEY_DELIMITER + enddt.ToString(Utilities.DATE_TIME_FORMAT)));
                                }

                                var rowkey   = pkey + Utilities.KEY_DELIMITER + dtkey.ToString(Utilities.DATE_TIME_FORMAT);
                                var tablerow = new CellSet.Row {
                                    key = Encoding.UTF8.GetBytes(rowkey)
                                };
                                foreach (var skey in aggregatedCounts[dtkey][pkey].Keys)
                                {
                                    var    columnkey    = "v:" + skey;
                                    double previousdata = 0;

                                    if (hbaseresultsets.ContainsKey(pkey) &&
                                        hbaseresultsets[pkey].ContainsKey(rowkey) &&
                                        hbaseresultsets[pkey][rowkey].ContainsKey(columnkey))
                                    {
                                        previousdata = hbaseresultsets[pkey][rowkey][columnkey];
                                    }

                                    var rowcell = new Cell
                                    {
                                        column = Encoding.UTF8.GetBytes(columnkey),
                                        data   = BitConverter.GetBytes(previousdata + aggregatedCounts[dtkey][pkey][skey])
                                    };

                                    tablerow.values.Add(rowcell);
                                    global_emit_count++;
                                    last_emit_count++;
                                    current_cache_size--;
                                }
                                writeset.rows.Add(tablerow);
                            }
                            keystoremove.Add(dtkey);
                        }
                    }

                    if (writeset != null && writeset.rows.Count > 0)
                    {
                        Context.Logger.Info("HBaseTableName: {0} - Rows to write: {1}, First Rowkey = {2}",
                                            this.HBaseTableName, writeset.rows.Count, Encoding.UTF8.GetString(writeset.rows[0].key));

                        var localstopwatch = new Stopwatch();
                        localstopwatch.Start();
                        //Use the StoreCells API to write the cellset into HBase Table
                        HBaseClusterClient.StoreCells(this.HBaseTableName, writeset);
                        Context.Logger.Info("HBase: Table = {0}, Rows Written = {1}, Write Time = {2} secs, Time since last write = {3} secs",
                                            this.HBaseTableName, writeset.rows.Count, localstopwatch.Elapsed.TotalSeconds, emitstopwatch.Elapsed.TotalSeconds);

                        foreach (var key in keystoremove)
                        {
                            aggregatedCounts.Remove(key);
                        }
                        last_emit_in_secs = emitstopwatch.Elapsed.TotalSeconds;
                        emitstopwatch.Restart();
                    }

                    if (!this.appConfig.HBaseOverwrite)
                    {
                        Context.Logger.Info("ScanHBase: Last Window Scan Count = {0}, Table = {1}", local_hbasescan_count, this.HBaseTableName);
                    }
                }

                if (last_emit_count > 0)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            catch (Exception ex)
            {
                last_error_count++;
                global_error_count++;
                Context.Logger.Error(ex.ToString());
                return(false);
            }
        }
Пример #28
0
        static async Task MainAsync(string[] args)
        {
            try
            {
                string clusterURL         = "https://myClusterName.azurehdinsight.net";
                string hadoopUsername     = "******";
                string hadoopUserPassword = "******";

                string hbaseTableName = "sampleHbaseTable";

                // Create a new instance of an HBase client.
                ClusterCredentials creds       = new ClusterCredentials(new Uri(clusterURL), hadoopUsername, hadoopUserPassword);
                HBaseClient        hbaseClient = new HBaseClient(creds);

                // Retrieve the cluster version.
                org.apache.hadoop.hbase.rest.protobuf.generated.Version version = await hbaseClient.GetVersionAsync();

                Console.WriteLine("The HBase cluster version is " + version);

                // Create a new HBase table.
                TableSchema testTableSchema = new TableSchema();
                testTableSchema.name = hbaseTableName;
                testTableSchema.columns.Add(new ColumnSchema()
                {
                    name = "d"
                });
                testTableSchema.columns.Add(new ColumnSchema()
                {
                    name = "f"
                });
                hbaseClient.CreateTableAsync(testTableSchema).Wait();

                // Insert data into the HBase table.
                string      testKey    = "content";
                string      testValue  = "the force is strong in this column";
                CellSet     cellSet    = new CellSet();
                CellSet.Row cellSetRow = new CellSet.Row {
                    key = Encoding.UTF8.GetBytes(testKey)
                };
                cellSet.rows.Add(cellSetRow);

                Cell value = new Cell {
                    column = Encoding.UTF8.GetBytes("d:starwars"), data = Encoding.UTF8.GetBytes(testValue)
                };
                cellSetRow.values.Add(value);
                hbaseClient.StoreCellsAsync(hbaseTableName, cellSet).Wait();

                // Retrieve a cell by its key.
                cellSet = await hbaseClient.GetCellsAsync(hbaseTableName, testKey);

                Console.WriteLine("The data with the key '" + testKey + "' is: " + Encoding.UTF8.GetString(cellSet.rows[0].values[0].data));
                // with the previous insert, it should yield: "the force is strong in this column"

                //Scan over rows in a table. Assume the table has integer keys and you want data between keys 25 and 35.
                Scanner scanSettings = new Scanner()
                {
                    batch    = 10,
                    startRow = BitConverter.GetBytes(25),
                    endRow   = BitConverter.GetBytes(35)
                };

                ScannerInformation scannerInfo = await hbaseClient.CreateScannerAsync(hbaseTableName, scanSettings, null);

                CellSet next = null;
                Console.WriteLine("Scan results");

                while ((next = await hbaseClient.ScannerGetNextAsync(scannerInfo, null)) != null)
                {
                    foreach (CellSet.Row row in next.rows)
                    {
                        Console.WriteLine(row.key + " : " + Encoding.UTF8.GetString(row.values[0].data));
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
            }

            Console.WriteLine("Press ENTER to continue ...");
            Console.ReadLine();
        }