public void When_I_Scan_with_a_FilterList_with_OR_logic_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber <= 2 select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); Filter f0 = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), CompareFilter.CompareOp.Equal, BitConverter.GetBytes(1)); Filter f1 = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), CompareFilter.CompareOp.LessThanOrEqualTo, BitConverter.GetBytes(2)); var filter = new FilterList(FilterList.Operator.MustPassOne, f0, f1); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_PrefixFilter_I_get_the_expected_results() { FilterTestRecord example = _allExpectedRecords.First(); byte[] rawRowkey = Encoding.UTF8.GetBytes(example.RowKey); const int prefixLength = 4; var prefix = new byte[prefixLength]; Array.Copy(rawRowkey, prefix, prefixLength); List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords let rawKey = Encoding.UTF8.GetBytes(r.RowKey) where rawKey[0] == prefix[0] && rawKey[1] == prefix[1] && rawKey[2] == prefix[2] && rawKey[3] == prefix[3] select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new PrefixFilter(prefix); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void TestFullScan() { var client = new HBaseClient(_credentials); StoreTestData(client); // full range scan var scanSettings = new Scanner { batch = 10 }; ScannerInformation scannerInfo = client.CreateScanner(_testTableName, scanSettings); CellSet next; var expectedSet = new HashSet <int>(Enumerable.Range(0, 100)); while ((next = client.ScannerGetNext(scannerInfo)) != null) { Assert.AreEqual(10, next.rows.Count); foreach (CellSet.Row row in next.rows) { int k = BitConverter.ToInt32(row.key, 0); expectedSet.Remove(k); } } Assert.AreEqual(0, expectedSet.Count, "The expected set wasn't empty! Items left {0}!", string.Join(",", expectedSet)); }
public void When_I_Scan_with_a_SingleColumnValueFilter_and_a_SubstringComparator_with_the_operator_equal_I_get_the_expected_results() { // grab a substring that is guaranteed to match at least one record. string ss = _allExpectedRecords.First().A.Substring(1, 2); //Debug.WriteLine("The substring value is: " + ss); List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.A.Contains(ss) select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var comparer = new SubstringComparator(ss); var filter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(ColumnNameA), CompareFilter.CompareOp.Equal, comparer); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void TestSubsetScan() { var client = new HBaseClient(_credentials); const int startRow = 15; const int endRow = 15 + 13; StoreTestData(client); // subset range scan var scanSettings = new Scanner { batch = 10, startRow = BitConverter.GetBytes(startRow), endRow = BitConverter.GetBytes(endRow) }; ScannerInformation scannerInfo = client.CreateScanner(_testTableName, scanSettings); CellSet next; var expectedSet = new HashSet <int>(Enumerable.Range(startRow, endRow - startRow)); while ((next = client.ScannerGetNext(scannerInfo)) != null) { foreach (CellSet.Row row in next.rows) { int k = BitConverter.ToInt32(row.key, 0); expectedSet.Remove(k); } } Assert.AreEqual(0, expectedSet.Count, "The expected set wasn't empty! Items left {0}!", string.Join(",", expectedSet)); }
public void When_I_Scan_all_I_get_the_expected_results() { var client = new HBaseClient(_credentials); var scan = new Scanner(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scan); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(_allExpectedRecords); }
public void When_I_Scan_all_I_get_the_expected_results() { var client = new HBaseClient(_credentials); var scan = new Scanner(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scan); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(_allExpectedRecords); }
public void When_I_Scan_with_a_TimestampsFilter_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = _allExpectedRecords; // scan all and retrieve timestamps var client = new HBaseClient(_credentials); var scanner = new Scanner(); ScannerInformation scanAll = client.CreateScanner(_tableName, scanner); List <long> timestamps = RetrieveTimestamps(scanAll).ToList(); // timestamps scan scanner = new Scanner(); var filter = new TimestampsFilter(timestamps); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public IEnumerable <FSWordRelationship> ReadAllWordRelationships() { Scanner s = new Scanner() { batch = 10 }; ScannerInformation si = client.CreateScanner(HadoopContext.WordRelationTableName, s); CellSet next = null; CellSet readRows = new CellSet(); while ((next = client.ScannerGetNext(si)) != null) { foreach (CellSet.Row row in next.rows) { //convert row into desired domain type.... readRows.rows.Add(row); } } return(null); }
public void When_I_Scan_with_a_PageFilter_I_get_the_expected_results() { var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new PageFilter(2); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.Count.ShouldBeGreaterThanOrEqualTo(2); }
public void When_I_Scan_with_a_ValueFilter_and_a_RegexStringComparator_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = _allExpectedRecords; var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ValueFilter(CompareFilter.CompareOp.Equal, new RegexStringComparator(".*")); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
static void Main(string[] args) { while (true) { Random rnd = new Random(); Console.Clear(); string clusterURL = "https://hb12345.azurehdinsight.net"; string userName = "******"; string password = "******"; // Connect to HBase cluster ClusterCredentials creds = new ClusterCredentials(new Uri(clusterURL), userName, password); HBaseClient hbaseClient = new HBaseClient(creds); // Get all stocks Scanner scanSettings = new Scanner() { batch = 10, startRow = Encoding.UTF8.GetBytes("AAA"), endRow = Encoding.UTF8.GetBytes("ZZZ") }; ScannerInformation stockScanner = hbaseClient.CreateScanner("Stocks", scanSettings); CellSet stockCells = null; while ((stockCells = hbaseClient.ScannerGetNext(stockScanner)) != null) { foreach (var row in stockCells.rows) { string stock = Encoding.UTF8.GetString(row.key); Double currentPrice = Double.Parse(Encoding.UTF8.GetString(row.values[1].data)); Double newPrice = currentPrice + (rnd.NextDouble() * (1 - -1) + -1); Cell c = new Cell { column = Encoding.UTF8.GetBytes("Current:Price"), data = Encoding.UTF8.GetBytes(newPrice.ToString()) }; row.values.Insert(2, c); Console.WriteLine(stock + ": " + currentPrice.ToString() + " := " + newPrice.ToString()); } hbaseClient.StoreCells("Stocks", stockCells); } } }
public void When_I_Scan_with_a_WhileMatchFilter_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber == 0 select r.WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new WhileMatchFilter(new ValueFilter(CompareFilter.CompareOp.NotEqual, new BinaryComparator(BitConverter.GetBytes(0)))); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_QualifierFilter_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithAValue(null).WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new QualifierFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(Encoding.UTF8.GetBytes(LineNumberColumnName))); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void TestScannerCreation() { var client = new HBaseClient(_credentials); var batchSetting = new Scanner { batch = 2 }; ScannerInformation scannerInfo = client.CreateScanner(_testTableName, batchSetting); Assert.AreEqual(_testTableName, scannerInfo.TableName); Assert.IsTrue( scannerInfo.Location.Authority.StartsWith("headnode", StringComparison.Ordinal), "returned location didn't start with \"headnode\", it was: {0}", scannerInfo.Location); }
public void When_I_Scan_with_a_ColumnRangeFilter_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithLineNumberValue(0).WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ColumnRangeFilter(Encoding.UTF8.GetBytes(ColumnNameA), true, Encoding.UTF8.GetBytes(ColumnNameB), false); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_RandomRowFilter_I_get_the_expected_results() { var client = new HBaseClient(_credentials); var scanner = new Scanner(); // set this large enough so that we get all records back var filter = new RandomRowFilter(2000.0F); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(_allExpectedRecords); }
public void When_I_Scan_with_a_ColumnPaginationFilter_I_get_the_expected_results() { // only grabbing the LineNumber column with (1, 1) List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithAValue(null).WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ColumnPaginationFilter(1, 1); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_ColumnCountGetFilter_I_get_the_expected_results() { // B column should not be returned, so set the value to null. List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ColumnCountGetFilter(2); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_RowFilter_I_get_the_expected_results() { FilterTestRecord example = _allExpectedRecords.First(); List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.RowKey == example.RowKey select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(Encoding.UTF8.GetBytes(example.RowKey))); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_KeyOnlyFilter_I_get_the_expected_results() { // a key only filter does not return column values List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select new FilterTestRecord(r.RowKey, 0, string.Empty, string.Empty)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new KeyOnlyFilter(); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_SingleColumnValueFilter_and_a_BinaryComparator_with_the_operator_not_equal_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber != 1 select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), CompareFilter.CompareOp.NotEqual, BitConverter.GetBytes(1)); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_InclusiveStopFilter_I_get_the_expected_results() { FilterTestRecord example = (from r in _allExpectedRecords where r.LineNumber == 2 select r).Single(); byte[] rawRowKey = Encoding.UTF8.GetBytes(example.RowKey); List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber <= 2 select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new InclusiveStopFilter(rawRowKey); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_MultipleColumnPrefixFilter_I_get_the_expected_results() { List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithLineNumberValue(0)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); // set this large enough so that we get all records back var prefixes = new List <byte[]> { Encoding.UTF8.GetBytes(ColumnNameA), Encoding.UTF8.GetBytes(ColumnNameB) }; var filter = new MultipleColumnPrefixFilter(prefixes); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_SingleColumnValueExcludeFilter_and_a_BinaryComparator_with_the_operator_equal_I_get_the_expected_results() { string bValue = (from r in _allExpectedRecords select r.B).First(); // B column should not be returned, so set the value to null. List <FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.B == bValue select r.WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new SingleColumnValueExcludeFilter( Encoding.UTF8.GetBytes(ColumnFamilyName2), Encoding.UTF8.GetBytes(ColumnNameB), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes(bValue)); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List <FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void TestFullScan() { var client = new HBaseClient(_credentials); StoreTestData(client); // full range scan var scanSettings = new Scanner { batch = 10 }; ScannerInformation scannerInfo = client.CreateScanner(_testTableName, scanSettings); CellSet next; var expectedSet = new HashSet<int>(Enumerable.Range(0, 100)); while ((next = client.ScannerGetNext(scannerInfo)) != null) { Assert.AreEqual(10, next.rows.Count); foreach (CellSet.Row row in next.rows) { int k = BitConverter.ToInt32(row.key, 0); expectedSet.Remove(k); } } Assert.AreEqual(0, expectedSet.Count, "The expected set wasn't empty! Items left {0}!", string.Join(",", expectedSet)); }
public void When_I_Scan_with_a_RowFilter_I_get_the_expected_results() { FilterTestRecord example = _allExpectedRecords.First(); List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.RowKey == example.RowKey select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(Encoding.UTF8.GetBytes(example.RowKey))); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_SingleColumnValueExcludeFilter_and_a_BinaryComparator_with_the_operator_equal_I_get_the_expected_results() { string bValue = (from r in _allExpectedRecords select r.B).First(); // B column should not be returned, so set the value to null. List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.B == bValue select r.WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new SingleColumnValueExcludeFilter( Encoding.UTF8.GetBytes(ColumnFamilyName2), Encoding.UTF8.GetBytes(ColumnNameB), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes(bValue)); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
When_I_Scan_with_a_SingleColumnValueFilter_and_a_BitComparator_with_the_operator_equal_and_the_bitop_XOR_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber != 3 select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var comparer = new BitComparator(BitConverter.GetBytes(3), BitComparator.BitwiseOp.Xor); var filter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), CompareFilter.CompareOp.Equal, comparer); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_InclusiveStopFilter_I_get_the_expected_results() { FilterTestRecord example = (from r in _allExpectedRecords where r.LineNumber == 2 select r).Single(); byte[] rawRowKey = Encoding.UTF8.GetBytes(example.RowKey); List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber <= 2 select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new InclusiveStopFilter(rawRowKey); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_SingleColumnValueFilter_and_a_NullComparator_with_the_operator_not_equal_I_get_the_expected_results() { var expectedRecords = new List<FilterTestRecord>(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var comparer = new NullComparator(); var filter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), CompareFilter.CompareOp.Equal, comparer); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_WhileMatchFilter_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber == 0 select r.WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new WhileMatchFilter(new ValueFilter(CompareFilter.CompareOp.NotEqual, new BinaryComparator(BitConverter.GetBytes(0)))); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_RandomRowFilter_I_get_the_expected_results() { var client = new HBaseClient(_credentials); var scanner = new Scanner(); // set this large enough so that we get all records back var filter = new RandomRowFilter(2000.0F); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(_allExpectedRecords); }
public void TestSubsetScan() { var client = new HBaseClient(_credentials); const int startRow = 15; const int endRow = 15 + 13; StoreTestData(client); // subset range scan var scanSettings = new Scanner { batch = 10, startRow = BitConverter.GetBytes(startRow), endRow = BitConverter.GetBytes(endRow) }; ScannerInformation scannerInfo = client.CreateScanner(_testTableName, scanSettings); CellSet next; var expectedSet = new HashSet<int>(Enumerable.Range(startRow, endRow - startRow)); while ((next = client.ScannerGetNext(scannerInfo)) != null) { foreach (CellSet.Row row in next.rows) { int k = BitConverter.ToInt32(row.key, 0); expectedSet.Remove(k); } } Assert.AreEqual(0, expectedSet.Count, "The expected set wasn't empty! Items left {0}!", string.Join(",", expectedSet)); }
public void When_I_Scan_with_a_QualifierFilter_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithAValue(null).WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new QualifierFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(Encoding.UTF8.GetBytes(LineNumberColumnName))); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_ColumnPaginationFilter_I_get_the_expected_results() { // only grabbing the LineNumber column with (1, 1) List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithAValue(null).WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ColumnPaginationFilter(1, 1); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_TimestampsFilter_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = _allExpectedRecords; // scan all and retrieve timestamps var client = new HBaseClient(_credentials); var scanner = new Scanner(); ScannerInformation scanAll = client.CreateScanner(_tableName, scanner); List<long> timestamps = RetrieveTimestamps(scanAll).ToList(); // timestamps scan scanner = new Scanner(); var filter = new TimestampsFilter(timestamps); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_ValueFilter_and_a_RegexStringComparator_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = _allExpectedRecords; var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ValueFilter(CompareFilter.CompareOp.Equal, new RegexStringComparator(".*")); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_KeyOnlyFilter_I_get_the_expected_results() { // a key only filter does not return column values List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select new FilterTestRecord(r.RowKey, 0, string.Empty, string.Empty)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new KeyOnlyFilter(); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_PageFilter_I_get_the_expected_results() { var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new PageFilter(2); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.Count.ShouldBeGreaterThanOrEqualTo(2); }
public void When_I_Scan_with_a_FilterList_with_OR_logic_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber <= 2 select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); Filter f0 = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), CompareFilter.CompareOp.Equal, BitConverter.GetBytes(1)); Filter f1 = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), CompareFilter.CompareOp.LessThanOrEqualTo, BitConverter.GetBytes(2)); var filter = new FilterList(FilterList.Operator.MustPassOne, f0, f1); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_DependentColumnFilter_and_a_BinaryComparator_with_the_operator_equal_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.LineNumber == 1 select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new DependentColumnFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(LineNumberColumnName), false, CompareFilter.CompareOp.Equal, new BinaryComparator(BitConverter.GetBytes(1))); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_SingleColumnValueFilter_and_a_SubstringComparator_with_the_operator_equal_I_get_the_expected_results() { // grab a substring that is guaranteed to match at least one record. string ss = _allExpectedRecords.First().A.Substring(1, 2); //Debug.WriteLine("The substring value is: " + ss); List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords where r.A.Contains(ss) select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var comparer = new SubstringComparator(ss); var filter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes(ColumnFamilyName1), Encoding.UTF8.GetBytes(ColumnNameA), CompareFilter.CompareOp.Equal, comparer); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
/// <summary> /// Retrieve sessions within the given time range /// </summary> /// <param name="hbaseClient">The hbase client</param> /// <param name="eventType">The type of event to look for</param> /// <param name="start">Lower bound of the time range</param> /// <param name="end">Upper bound of the time range</param> static void GetSessionsByTime(HBaseClient hbaseClient, string eventType, DateTime start, DateTime end) { //Create filters list FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter to search for the event type value SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes(eventType)); filters.AddFilter(valueFilter); //Create scanner, set maxVersions so we can get previous versions of rows //Since START events may not be the currently returned value var scannerSettings = new Scanner() { filter = filters.ToEncodedString(), maxVersions = 5, startTime = ToUnixTime(start), endTime = ToUnixTime(end) }; var scanner = hbaseClient.CreateScanner(Properties.Settings.Default.HBaseTableName, scannerSettings); //Read data from scanner CellSet readSet = null; //While reading cell sets while ((readSet = hbaseClient.ScannerGetNext(scanner)) != null) { //Iterate over the rows returned foreach (var row in readSet.rows) { //Get the time stored for the START event var endTime = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]; //Get the hbase timestamp of the row var timestamp = row.values.Select(v => v.timestamp).ToArray()[0]; //If it's an end event type if (eventType == "END") { //Get the duration stored between END and START events var duration = new TimeSpan( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:duration") .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]); //Write out the session info, including duration Console.WriteLine("Session {0} lasted {1} minutes, and ended at {2}", Encoding.UTF8.GetString(row.key), duration.Minutes, FromUnixTime(endTime)); } else { //If start event type, just write out when it started and the hbase timestamp for the row Console.WriteLine("Session {0} started at {1}. Timestamp = {2}", Encoding.UTF8.GetString(row.key), FromUnixTime(endTime), timestamp); } } } }
public void When_I_Scan_with_a_MultipleColumnPrefixFilter_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithLineNumberValue(0)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); // set this large enough so that we get all records back var prefixes = new List<byte[]> { Encoding.UTF8.GetBytes(ColumnNameA), Encoding.UTF8.GetBytes(ColumnNameB) }; var filter = new MultipleColumnPrefixFilter(prefixes); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
/// <summary> /// Scan the HBase Table for a give start row key and end row key /// You can swap out the scanner for a differnt type of filters like PrefixFilter /// Read more about HBase Filters here: http://hbase.apache.org/book/client.filter.html /// You can also refer to HBase SDK examples here on how each filter is used: https://github.com/hdinsight/hbase-sdk-for-net/tree/master/Microsoft.HBase.Client.Tests /// </summary> /// <param name="startrowkey"></param> /// <param name="endrowkey"></param> /// <returns></returns> public Dictionary <string, Dictionary <string, double> > ScanHBase(string startrowkey, string endrowkey) { global_hbasescan_count++; local_hbasescan_count++; if (global_hbasescan_count % 1000 == 0) { Context.Logger.Info("ScanHBase: Global Scan Count = {0}, Table = {1}, StartRowKey = {2}, EndRowKey = {3}", global_hbasescan_count, this.HBaseTableName, startrowkey, endrowkey); } var scannersettings = new Scanner() { startRow = Encoding.UTF8.GetBytes(startrowkey), endRow = Encoding.UTF8.GetBytes(endrowkey) }; var hbaseresultset = new Dictionary <string, Dictionary <string, double> >(); try { var scannerInfo = HBaseClusterClient.CreateScanner(this.HBaseTableName, scannersettings); CellSet readset = null; while ((readset = HBaseClusterClient.ScannerGetNext(scannerInfo)) != null) { foreach (var row in readset.rows) { var rowkey = Encoding.UTF8.GetString(row.key); if (hbaseresultset.ContainsKey(rowkey)) { foreach (var column in row.values) { var columnkey = Encoding.UTF8.GetString(column.column); var value = BitConverter.ToDouble(column.data, 0); if (hbaseresultset[rowkey].ContainsKey(columnkey)) { hbaseresultset[rowkey][columnkey] += value; } else { hbaseresultset[rowkey].Add(columnkey, value); } } } else { var newresult = new Dictionary <string, double>(); foreach (var column in row.values) { var columnkey = Encoding.UTF8.GetString(column.column); var value = BitConverter.ToDouble(column.data, 0); if (newresult.ContainsKey(columnkey)) { newresult[columnkey] += value; } else { newresult.Add(columnkey, value); } } hbaseresultset.Add(rowkey, newresult); } } } } catch (Exception) { Context.Logger.Error("ScanHBase Failed: Table = {0}, StartRowKey = {1}, EndRowKey = {2}", this.HBaseTableName, startrowkey, endrowkey); throw; } return(hbaseresultset); }
public Dictionary <string, Dictionary <string, double> > ScanHBase(string hbasetablename, string startkey, string endkey) { var scannersettings = new Scanner() { startRow = Encoding.UTF8.GetBytes(startkey), endRow = Encoding.UTF8.GetBytes(endkey), }; var localstopwatch = new Stopwatch(); localstopwatch.Start(); var hbaseresultset = new Dictionary <string, Dictionary <string, double> >(); var scannerInfo = HBaseClusterClient.CreateScanner(hbasetablename, scannersettings); CellSet readset = null; while ((readset = HBaseClusterClient.ScannerGetNext(scannerInfo)) != null) { foreach (var row in readset.rows) { var rowkey = Encoding.UTF8.GetString(row.key); if (hbaseresultset.ContainsKey(rowkey)) { foreach (var column in row.values) { var columnkey = Encoding.UTF8.GetString(column.column); var value = BitConverter.ToDouble(column.data, 0); if (hbaseresultset[rowkey].ContainsKey(columnkey)) { hbaseresultset[rowkey][columnkey] += value; } else { hbaseresultset[rowkey].Add(columnkey, value); } } } else { var newresult = new Dictionary <string, double>(); foreach (var column in row.values) { var columnkey = Encoding.UTF8.GetString(column.column); var value = BitConverter.ToDouble(column.data, 0); if (newresult.ContainsKey(columnkey)) { newresult[columnkey] += value; } else { newresult.Add(columnkey, value); } } hbaseresultset.Add(rowkey, newresult); } } } var overallresult = new Dictionary <string, double>(); LOG.InfoFormat("ScanHBase: {0} - Time Taken = {1} ms", hbasetablename, localstopwatch.ElapsedMilliseconds); return(hbaseresultset); }
public void When_I_Scan_with_a_ColumnRangeFilter_I_get_the_expected_results() { List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithLineNumberValue(0).WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ColumnRangeFilter(Encoding.UTF8.GetBytes(ColumnNameA), true, Encoding.UTF8.GetBytes(ColumnNameB), false); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_ColumnCountGetFilter_I_get_the_expected_results() { // B column should not be returned, so set the value to null. List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords select r.WithBValue(null)).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new ColumnCountGetFilter(2); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public void When_I_Scan_with_a_PrefixFilter_I_get_the_expected_results() { FilterTestRecord example = _allExpectedRecords.First(); byte[] rawRowkey = Encoding.UTF8.GetBytes(example.RowKey); const int prefixLength = 4; var prefix = new byte[prefixLength]; Array.Copy(rawRowkey, prefix, prefixLength); List<FilterTestRecord> expectedRecords = (from r in _allExpectedRecords let rawKey = Encoding.UTF8.GetBytes(r.RowKey) where rawKey[0] == prefix[0] && rawKey[1] == prefix[1] && rawKey[2] == prefix[2] && rawKey[3] == prefix[3] select r).ToList(); var client = new HBaseClient(_credentials); var scanner = new Scanner(); var filter = new PrefixFilter(prefix); scanner.filter = filter.ToEncodedString(); ScannerInformation scanInfo = client.CreateScanner(_tableName, scanner); List<FilterTestRecord> actualRecords = RetrieveResults(scanInfo).ToList(); actualRecords.ShouldContainOnly(expectedRecords); }
public async Task TestScannerDeletion() { var client = new HBaseClient(_credentials); // full range scan var scanSettings = new Scanner { batch = 10 }; ScannerInformation scannerInfo = client.CreateScanner(_testTableName, scanSettings); await client.DeleteScannerAsync(scannerInfo.TableName, scannerInfo.ScannerId); }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { try { //TODO: Change the HBase scanning criteria as per your needs //filter = new PrefixFilter(ToBytes(tuple.GetValue(0))) //Or, use a different field for end scan like: endRow = ToBytes(tuple.GetValue(1)) var scannersettings = new Scanner() { startRow = ToBytes(tuple.GetValue(0)), endRow = ToBytes(tuple.GetValue(0)), }; var scannerInfo = HBaseClusterClient.CreateScanner(this.HBaseTableName, scannersettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scannerInfo)) != null) { Context.Logger.Info("Rows found: {0}", readSet.rows.Count); foreach (var row in readSet.rows) { var emitValues = new List <object>(); //TODO: You can choose to emit the row key along with the values emitValues.Add(Encoding.UTF8.GetString(row.key)); //Add the values from the readSet //TODO: The byte[] from HBase can be any type, make sure you type cast it correctly before emitting //The code below only handles strings emitValues.AddRange(row.values.Select(v => Encoding.UTF8.GetString(v.data))); Context.Logger.Info("Rowkey: {0}, Values: {1}", Encoding.UTF8.GetString(row.key), String.Join(", ", row.values.Select(v => Encoding.UTF8.GetString(v.data)))); if (enableAck) { this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); } else { this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } } //Ack the tuple if enableAck is set to true in TopologyBuilder. This is mandatory if the downstream bolt or spout expects an ack. if (enableAck) { this.context.Ack(tuple); } } catch (Exception ex) { Context.Logger.Error("An error occured while executing Tuple Id: {0}. Exception Details:\r\n{1}", tuple.GetTupleId(), ex.ToString()); //Fail the tuple if enableAck is set to true in TopologyBuilder so that the tuple is replayed. if (enableAck) { this.context.Fail(tuple); } } }
/// <summary> /// Executes incoming tuples /// </summary> /// <param name="tuple">The first field is treated as rowkey and rest as column names</param> public void Execute(SCPTuple tuple) { //get the tuple info string sessionId = tuple.GetString(0); string sessionEvent = tuple.GetString(1); long sessionEventTime = tuple.GetLong(2); //If it's a start event, assume there's nothing to find so just re-emit //NOTE: If messages may arrive out of order, you would need to add logic to //query HBase to see if the end event has previously arrived, //calculate the duration, etc. if (sessionEvent == "START") { //Just re-emit the incoming data, plus 0 for duration, since we declare we send a 0 duration //since we don't know the END event yet. Values emitValues = new Values(tuple.GetValue(0), tuple.GetValue(1), tuple.GetValue(2), 0L); //Is ack enabled? if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } if (sessionEvent == "END") { //Use filters FilterList filters = new FilterList(FilterList.Operator.MustPassAll); //Filter on the row by sessionID RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.Equal, new BinaryComparator(TypeHelper.ToBytes(sessionId))); filters.AddFilter(rowFilter); //Filter on the event column for the START event SingleColumnValueFilter valueFilter = new SingleColumnValueFilter( Encoding.UTF8.GetBytes("cf"), Encoding.UTF8.GetBytes("event"), CompareFilter.CompareOp.Equal, Encoding.UTF8.GetBytes("START")); filters.AddFilter(valueFilter); //Create scanner settings using the filters var scannerSettings = new Scanner() { filter = filters.ToEncodedString() }; //Get the scanner var scanner = HBaseClusterClient.CreateScanner(HBaseTableName, scannerSettings); CellSet readSet = null; while ((readSet = HBaseClusterClient.ScannerGetNext(scanner)) != null) { //In theory we should only find one row foreach (var row in readSet.rows) { //Pull back just the event column var rowState = row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:event") .Select(v => Encoding.UTF8.GetString(v.data)).ToArray()[0]; //Is it a START event as expected? if (rowState == "START") { //Get the start time var startTime = TypeHelper.FromUnixTime( row.values.Where(v => Encoding.UTF8.GetString(v.column) == "cf:time") .Select(v => BitConverter.ToInt64(v.data, 0)).ToArray()[0]); //Get the difference between start and end DateTime endTime = TypeHelper.FromUnixTime(sessionEventTime); TimeSpan duration = endTime.Subtract(startTime); //Emit the tuple, with the duration between start/end. Values emitValues = new Values(sessionId, sessionEvent, sessionEventTime, duration.Ticks); //If ack is enabled if (enableAck) { //Emit the values, anchored to the incoming tuple this.context.Emit(Constants.DEFAULT_STREAM_ID, new List <SCPTuple>() { tuple }, emitValues); //Ack the incoming tuple this.context.Ack(tuple); } else { //No ack enabled? Fire and forget. this.context.Emit(Constants.DEFAULT_STREAM_ID, emitValues); } } else { //Since this is a simple example, do nothing. //In a real solution, you'd have to figure out what to do //when receiving an END before a START. } } } } }