예제 #1
0
        /// <summary>
        /// A sketch can be viewed as either having k tables (one for each hash) of n elements
        /// or as a two-dimensional array of k columns * n rows.
        /// This constructor creates a Sketch of size specified by k, n, and the number of bits per element.
        /// </summary>
        /// <param name="numberOfColumns">The number of columns in the sketch, which is equivalent to the number of tables (one table per hash index).</param>
        /// <param name="numberOfRows">The number of rows, which is equivalent to the number of elements per table.</param>
        /// <param name="bitsPerElement">The size of each element, in bits, such that the maximum value that can be stored (MaxValue)
        /// in the sketch elements is 2^(n)-1.</param>
        public Sketch(long numberOfColumns, long numberOfRows, int bitsPerElement)
        {
            // Class members set explicitly by the constuctor's parameters
            NumberOfColumns = numberOfColumns;
            NumberOfRows    = numberOfRows;
            BitsPerElement  = bitsPerElement;

            // The maximum value that can be stored in an element is 2^(BitsPerElement)-1
            MaxValue = ((((ulong)1) << bitsPerElement) - 1);

            // Each column should have a row of elements (1 element for each row)
            Columns = new ArrayOfUnsignedNumericOfNonstandardSize[numberOfColumns];
            for (long i = 0; i < numberOfColumns; i++)
            {
                Columns[i] = ArrayOfUnsignedNumericOfNonstandardSize.Create(bitsPerElement, numberOfRows);
            }
            ColumnTotals = new ulong[numberOfColumns];


            // Test to see if the NumberOfRows is a power of 2.
            //
            // A number if a power of two if ANDing its value with the value one less than it yields zero.
            // For example, for binary 8 (1000), 8-1=7 (01111) and so (1000 & 0001 = 0000).
            // In all other cases, subtracting one will preserve the leftmost bit and so (x & (x-1)) will be greater than zero.
            IsTheNumberOfRowsAPowerOfTwo = ((numberOfRows - 1) & numberOfRows) == 0;

            // Calculate the number of bits that will be required to index within each column (the row index)
            // To do so, count how many times we need to right shift by 1 in order to consume all the bits in the NumberOfRows.
            int hashBitsPerRowIndex = 0;

            for (long shiftedNumberOfRows = numberOfRows; shiftedNumberOfRows > 0; shiftedNumberOfRows = shiftedNumberOfRows >> 1)
            {
                hashBitsPerRowIndex++;
            }

            if (IsTheNumberOfRowsAPowerOfTwo)
            {
                // If the number of rows is a power of two, we overcounted the bits needed by one
                hashBitsPerRowIndex -= 1;
                // The fastest way to create indexes will be to mask using the number of rows - 1,
                // For example, if there are 1024 rows (00010000000000b), the mask is (00001111111111b).
                RowIndexMaskForPowersOfTwo = numberOfRows - 1;
            }
            else
            {
                // Use an extra 10 bits worth of the hash to reduce the potential bias for lower-indexes to 0.1%.
                hashBitsPerRowIndex += 10;
            }
            // The number of hash bytes we need for each index within a column (the row index) is:
            //    ceiling( HashBitsPerRowIndex / 8 )
            // This is equivalent to floor( (HashBitsPerRowIndex + 7 / 8 ).
            HashBytesPerRowIndex = (hashBitsPerRowIndex + 7) / 8; // Ceiling function of / 8
        }
예제 #2
0
        /// <summary>
        /// A sketch can be viewed as either having k tables (one for each hash) of n elements
        /// or as a two-dimensional array of k columns * n rows.
        /// This constructor creates a Sketch of size specified by k, n, and the number of bits per element.
        /// </summary>
        /// <param name="numberOfColumns">The number of columns in the sketch, which is equivalent to the number of tables (one table per hash index).</param>
        /// <param name="numberOfRows">The number of rows, which is equivalent to the number of elements per table.</param>
        /// <param name="bitsPerElement">The size of each element, in bits, such that the maximum value that can be stored (MaxValue)
        /// in the sketch elements is 2^(n)-1.</param>
        public Sketch(long numberOfColumns, long numberOfRows, int bitsPerElement)
        {
            // Class members set explicitly by the constuctor's parameters
            NumberOfColumns = numberOfColumns;
            NumberOfRows = numberOfRows;
            BitsPerElement = bitsPerElement;

            // The maximum value that can be stored in an element is 2^(BitsPerElement)-1
            MaxValue = ((((ulong)1) << bitsPerElement) - 1);

            // Each column should have a row of elements (1 element for each row)
            Columns = new ArrayOfUnsignedNumericOfNonstandardSize[numberOfColumns];
            for (long i = 0; i < numberOfColumns; i++)
                Columns[i] = ArrayOfUnsignedNumericOfNonstandardSize.Create(bitsPerElement, numberOfRows);
            ColumnTotals = new ulong[numberOfColumns];

            // Test to see if the NumberOfRows is a power of 2.
            //
            // A number if a power of two if ANDing its value with the value one less than it yields zero.
            // For example, for binary 8 (1000), 8-1=7 (01111) and so (1000 & 0001 = 0000).
            // In all other cases, subtracting one will preserve the leftmost bit and so (x & (x-1)) will be greater than zero.
            IsTheNumberOfRowsAPowerOfTwo = ( (numberOfRows - 1) & numberOfRows ) == 0;

            // Calculate the number of bits that will be required to index within each column (the row index)
            // To do so, count how many times we need to right shift by 1 in order to consume all the bits in the NumberOfRows.
            int hashBitsPerRowIndex = 0;
            for (long shiftedNumberOfRows = numberOfRows; shiftedNumberOfRows > 0; shiftedNumberOfRows = shiftedNumberOfRows >> 1)
                hashBitsPerRowIndex++;

            if (IsTheNumberOfRowsAPowerOfTwo)
            {
                // If the number of rows is a power of two, we overcounted the bits needed by one
                hashBitsPerRowIndex -= 1;
                // The fastest way to create indexes will be to mask using the number of rows - 1,
                // For example, if there are 1024 rows (00010000000000b), the mask is (00001111111111b).
                RowIndexMaskForPowersOfTwo = numberOfRows-1;
            } else {
                // Use an extra 10 bits worth of the hash to reduce the potential bias for lower-indexes to 0.1%.
                hashBitsPerRowIndex += 10;
            }
            // The number of hash bytes we need for each index within a column (the row index) is:
            //    ceiling( HashBitsPerRowIndex / 8 )
            // This is equivalent to floor( (HashBitsPerRowIndex + 7 / 8 ).
            HashBytesPerRowIndex = (hashBitsPerRowIndex + 7) / 8; // Ceiling function of / 8
        }