/// <summary> /// A sketch can be viewed as either having k tables (one for each hash) of n elements /// or as a two-dimensional array of k columns * n rows. /// This constructor creates a Sketch of size specified by k, n, and the number of bits per element. /// </summary> /// <param name="numberOfColumns">The number of columns in the sketch, which is equivalent to the number of tables (one table per hash index).</param> /// <param name="numberOfRows">The number of rows, which is equivalent to the number of elements per table.</param> /// <param name="bitsPerElement">The size of each element, in bits, such that the maximum value that can be stored (MaxValue) /// in the sketch elements is 2^(n)-1.</param> public Sketch(long numberOfColumns, long numberOfRows, int bitsPerElement) { // Class members set explicitly by the constuctor's parameters NumberOfColumns = numberOfColumns; NumberOfRows = numberOfRows; BitsPerElement = bitsPerElement; // The maximum value that can be stored in an element is 2^(BitsPerElement)-1 MaxValue = ((((ulong)1) << bitsPerElement) - 1); // Each column should have a row of elements (1 element for each row) Columns = new ArrayOfUnsignedNumericOfNonstandardSize[numberOfColumns]; for (long i = 0; i < numberOfColumns; i++) { Columns[i] = ArrayOfUnsignedNumericOfNonstandardSize.Create(bitsPerElement, numberOfRows); } ColumnTotals = new ulong[numberOfColumns]; // Test to see if the NumberOfRows is a power of 2. // // A number if a power of two if ANDing its value with the value one less than it yields zero. // For example, for binary 8 (1000), 8-1=7 (01111) and so (1000 & 0001 = 0000). // In all other cases, subtracting one will preserve the leftmost bit and so (x & (x-1)) will be greater than zero. IsTheNumberOfRowsAPowerOfTwo = ((numberOfRows - 1) & numberOfRows) == 0; // Calculate the number of bits that will be required to index within each column (the row index) // To do so, count how many times we need to right shift by 1 in order to consume all the bits in the NumberOfRows. int hashBitsPerRowIndex = 0; for (long shiftedNumberOfRows = numberOfRows; shiftedNumberOfRows > 0; shiftedNumberOfRows = shiftedNumberOfRows >> 1) { hashBitsPerRowIndex++; } if (IsTheNumberOfRowsAPowerOfTwo) { // If the number of rows is a power of two, we overcounted the bits needed by one hashBitsPerRowIndex -= 1; // The fastest way to create indexes will be to mask using the number of rows - 1, // For example, if there are 1024 rows (00010000000000b), the mask is (00001111111111b). RowIndexMaskForPowersOfTwo = numberOfRows - 1; } else { // Use an extra 10 bits worth of the hash to reduce the potential bias for lower-indexes to 0.1%. hashBitsPerRowIndex += 10; } // The number of hash bytes we need for each index within a column (the row index) is: // ceiling( HashBitsPerRowIndex / 8 ) // This is equivalent to floor( (HashBitsPerRowIndex + 7 / 8 ). HashBytesPerRowIndex = (hashBitsPerRowIndex + 7) / 8; // Ceiling function of / 8 }
/// <summary> /// A sketch can be viewed as either having k tables (one for each hash) of n elements /// or as a two-dimensional array of k columns * n rows. /// This constructor creates a Sketch of size specified by k, n, and the number of bits per element. /// </summary> /// <param name="numberOfColumns">The number of columns in the sketch, which is equivalent to the number of tables (one table per hash index).</param> /// <param name="numberOfRows">The number of rows, which is equivalent to the number of elements per table.</param> /// <param name="bitsPerElement">The size of each element, in bits, such that the maximum value that can be stored (MaxValue) /// in the sketch elements is 2^(n)-1.</param> public Sketch(long numberOfColumns, long numberOfRows, int bitsPerElement) { // Class members set explicitly by the constuctor's parameters NumberOfColumns = numberOfColumns; NumberOfRows = numberOfRows; BitsPerElement = bitsPerElement; // The maximum value that can be stored in an element is 2^(BitsPerElement)-1 MaxValue = ((((ulong)1) << bitsPerElement) - 1); // Each column should have a row of elements (1 element for each row) Columns = new ArrayOfUnsignedNumericOfNonstandardSize[numberOfColumns]; for (long i = 0; i < numberOfColumns; i++) Columns[i] = ArrayOfUnsignedNumericOfNonstandardSize.Create(bitsPerElement, numberOfRows); ColumnTotals = new ulong[numberOfColumns]; // Test to see if the NumberOfRows is a power of 2. // // A number if a power of two if ANDing its value with the value one less than it yields zero. // For example, for binary 8 (1000), 8-1=7 (01111) and so (1000 & 0001 = 0000). // In all other cases, subtracting one will preserve the leftmost bit and so (x & (x-1)) will be greater than zero. IsTheNumberOfRowsAPowerOfTwo = ( (numberOfRows - 1) & numberOfRows ) == 0; // Calculate the number of bits that will be required to index within each column (the row index) // To do so, count how many times we need to right shift by 1 in order to consume all the bits in the NumberOfRows. int hashBitsPerRowIndex = 0; for (long shiftedNumberOfRows = numberOfRows; shiftedNumberOfRows > 0; shiftedNumberOfRows = shiftedNumberOfRows >> 1) hashBitsPerRowIndex++; if (IsTheNumberOfRowsAPowerOfTwo) { // If the number of rows is a power of two, we overcounted the bits needed by one hashBitsPerRowIndex -= 1; // The fastest way to create indexes will be to mask using the number of rows - 1, // For example, if there are 1024 rows (00010000000000b), the mask is (00001111111111b). RowIndexMaskForPowersOfTwo = numberOfRows-1; } else { // Use an extra 10 bits worth of the hash to reduce the potential bias for lower-indexes to 0.1%. hashBitsPerRowIndex += 10; } // The number of hash bytes we need for each index within a column (the row index) is: // ceiling( HashBitsPerRowIndex / 8 ) // This is equivalent to floor( (HashBitsPerRowIndex + 7 / 8 ). HashBytesPerRowIndex = (hashBitsPerRowIndex + 7) / 8; // Ceiling function of / 8 }