Пример #1
0
        public XArray Remap(XArray source, ref int[] remapArray)
        {
            // See if we have the remapping cached already
            ArraySelector cachedMapping;

            if (_cachedRemappings.TryGetValue(source.Selector, out cachedMapping))
            {
                return(source.Reselect(cachedMapping));
            }

            // Convert the BitVector to indices if we haven't yet (deferred to first column wanting values)
            if (!_indicesFound)
            {
                _indicesFound = true;
                Allocator.AllocateToSize(ref _indices, _count);
                int countFound = _vector.Page(_indices, ref _nextVectorIndex, _count);
                if (countFound != _count)
                {
                    throw new InvalidOperationException($"RowRemapper found {countFound:n0} rows when {_count:n0} expected paging in Vector with {_vector.Count:n0} total matches up to index {_nextVectorIndex:n0}.");
                }
            }

            // Remap the outer selector
            XArray remapped = source.Select(ArraySelector.Map(_indices, _count), ref remapArray);

            // Cache the remapping
            _cachedRemappings[source.Selector] = remapped.Selector;

            return(remapped);
        }
Пример #2
0
        public XArray Values()
        {
            bool[] nulls = null;
            if (_nullItemIndex != -1)
            {
                nulls = new bool[this.Metadata.Length];
                nulls[_nullItemIndex] = true;
            }

            int[] indicesInOrder = new int[this.Count];
            for (int i = 0; i < this.Metadata.Length; ++i)
            {
                if (this.Metadata[i] != 0)
                {
                    indicesInOrder[_values[i]] = i;
                }
            }

            // Build an indexed XArray pointing to the keys in insertion order
            XArray keysInOrder = XArray.All(_keys, this.Count, nulls).Reselect(ArraySelector.Map(indicesInOrder, this.Count));

            // Convert it to a contiguous, 0-based XArray
            T[]    contiguousCopy   = null;
            bool[] contiguousIsNull = null;
            return(keysInOrder.ToContiguous <T>(ref contiguousCopy, ref contiguousIsNull));
        }
Пример #3
0
        public BitVector TryGetValues(XArray keys, out ArraySelector rightSideSelector)
        {
            Allocator.AllocateToSize(ref _returnedVector, keys.Count);
            Allocator.AllocateToSize(ref _returnedIndicesBuffer, keys.Count);

            _returnedVector.None();

            int countFound = 0;

            T[] keyArray = (T[])keys.Array;
            for (int i = 0; i < keys.Count; ++i)
            {
                int index = keys.Index(i);
                int foundAtIndex;
                if ((keys.HasNulls && keys.NullRows[index]) || !_dictionary.TryGetValue(keyArray[index], out foundAtIndex))
                {
                    _returnedVector.Clear(i);
                }
                else
                {
                    _returnedVector.Set(i);
                    _returnedIndicesBuffer[countFound++] = foundAtIndex;
                }
            }

            // Write out the indices of the joined rows for each value found
            rightSideSelector = ArraySelector.Map(_returnedIndicesBuffer, countFound);

            // Return the vector of which input rows matched
            return(_returnedVector);
        }
Пример #4
0
        // Return an XArray with two empty array elements before and after the valid portion and indices pointing to the valid portion
        public static XArray Pad(XArray values)
        {
            Array modifiedArray = null;

            bool[] nulls = null;
            Allocator.AllocateToSize(ref modifiedArray, values.Array.Length + 4, values.Array.GetType().GetElementType());

            if (values.HasNulls)
            {
                nulls = new bool[values.Array.Length + 4];
            }

            int[] indices = new int[modifiedArray.Length];

            // Copy values shifted over two (so, two default values at the beginning and two at the end)
            for (int i = 0; i < values.Array.Length; ++i)
            {
                indices[i] = i + 2;
                modifiedArray.SetValue(values.Array.GetValue(values.Index(i)), indices[i]);

                if (values.HasNulls)
                {
                    nulls.SetValue(values.NullRows.GetValue(values.Index(i)), indices[i]);
                }
            }

            // Return an XArray with the padded array with the indices and shorter real length
            int[] remapArray = null;
            return(XArray.All(modifiedArray, values.Count, nulls).Select(ArraySelector.Map(indices, values.Count), ref remapArray));
        }
Пример #5
0
        protected override void Expand()
        {
            // Build a selector of table values which were non-empty
            int[] indices = new int[_assignedIndices.Length];

            byte[] metadata = this.Metadata;
            int    count    = 0;

            for (int i = 0; i < indices.Length; ++i)
            {
                if (metadata[i] != 0)
                {
                    indices[count++] = i;
                }
            }

            // Save the old keys, ranks, and row indices in arrays
            XArray[] keyArrays = new XArray[_keys.Length];
            for (int i = 0; i < _keys.Length; ++i)
            {
                keyArrays[i] = XArray.All(_keys[i].Values).Reselect(ArraySelector.Map(indices, count));
            }

            XArray indicesArray = XArray.All(_assignedIndices).Reselect(ArraySelector.Map(indices, count));

            // Expand the table
            Reset(HashCore.ResizeToSize(_assignedIndices.Length));

            // Add items to the enlarged table
            FindOrAdd(keyArrays, indicesArray);
        }
Пример #6
0
        public XArray Remap(XArray values, ArraySelector selector)
        {
            // Read row indices and convert to int[]
            XArray indexByteArray = _rowIndexReader.Read(selector);
            XArray indexIntArray  = _rowIndexToIntConverter(indexByteArray);

            // Return the selected values
            return(values.Reselect(ArraySelector.Map((int[])indexIntArray.Array, indexIntArray.Count)));
        }
Пример #7
0
        private void PostSortAndFilter(XArray groups, XArray counts, int totalRowCount, bool wasAllRows)
        {
            int[] finalIndices = new int[groups.Count];
            int[] finalCounts  = new int[groups.Count];

            int groupCount = 0;

            // Filter to counts over the minimum percentage threshold
            int[] countsArray = (int[])counts.Array;
            if (countsArray != null)
            {
                int threshold = (int)(totalRowCount * MinimumPercentageToReport);
                for (int i = 0; i < groups.Count; ++i)
                {
                    int count = countsArray[counts.Index(i)];
                    if (count >= threshold)
                    {
                        finalIndices[groupCount] = i;
                        finalCounts[groupCount]  = count;
                        groupCount++;
                    }
                }
            }

            // Sort the values by count descending
            Array.Sort <int, int>(finalCounts, finalIndices, 0, groupCount, new ReverseComparer());

            // Limit to the top N if needed
            if (groupCount > MaximumCountToReturn)
            {
                groupCount = MaximumCountToReturn;
            }

            // Set the distinct count (now that it's known)
            _distinctCount = groupCount;

            // Set the output values
            int[]  groupsRemap  = null;
            XArray finalCountsX = XArray.All(finalCounts, groupCount);

            _columns[0].SetValues(groups.Select(ArraySelector.Map(finalIndices, groupCount), ref groupsRemap));
            _columns[1].SetValues(finalCountsX);

            if (wasAllRows)
            {
                _columns[2].SetValues(PercentageAggregator.ToPercentageStrings(finalCountsX, totalRowCount, PercentageAggregator.TwoSigFigs));
            }
            else
            {
                _columns[2].SetValues(PercentageAggregator.ToPercentageStrings(finalCountsX, totalRowCount, PercentageAggregator.WholePercentage));
            }
        }
Пример #8
0
        private void Convert()
        {
            // Close the row index writer
            _rowIndexWriter.Dispose();
            _rowIndexWriter = null;

            // If we wrote any rows we need to convert...
            if (_rowCountWritten > 0)
            {
                // Get the set of unique values and get rid of the value dictionary
                XArray values = _dictionary.Values();

                // Convert the indices previously written into raw values
                Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(typeof(byte), typeof(int));
                using (IColumnReader rowIndexReader = new PrimitiveArrayReader <byte>(_streamProvider.OpenRead(Path.Combine(_columnPath, RowIndexFileName))))
                {
                    int           rowCount = rowIndexReader.Count;
                    ArraySelector page     = ArraySelector.All(0).NextPage(rowCount, 10240);
                    while (page.Count > 0)
                    {
                        // Read an XArray of indices and convert to int[]
                        XArray rowIndices = converter(rowIndexReader.Read(page));

                        // Write the corresponding values
                        // Reselect is safe because 'values' are converted to a contiguous array
                        _valueWriter.Append(values.Reselect(ArraySelector.Map((int[])rowIndices.Array, rowIndices.Count)));

                        page = page.NextPage(rowCount, 10240);
                    }
                }
            }

            // Remove the Dictionary (so future rows are streamed out as-is)
            _dictionary = null;

            // Delete the row index file
            _streamProvider.Delete(Path.Combine(_columnPath, RowIndexFileName));
        }
Пример #9
0
        public XArray[] DistinctKeys()
        {
            // Build a map from each assigned index to the hash bucket containing it
            int[] indicesInOrder = new int[Count];

            byte[] metadata = this.Metadata;
            for (int i = 0; i < metadata.Length; ++i)
            {
                if (metadata[i] != 0)
                {
                    indicesInOrder[_assignedIndices[i]] = i;
                }
            }

            // Get the array for each key and reselect into assigned order
            XArray[] keyArrays = new XArray[_keys.Length];
            for (int i = 0; i < _keys.Length; ++i)
            {
                keyArrays[i] = XArray.All(_keys[i].Values).Reselect(ArraySelector.Map(indicesInOrder, Count));
            }

            return(keyArrays);
        }
Пример #10
0
        /// <summary>
        ///  Choose a random sample of approximately 1/8 of the rows in an ArraySelector.
        ///  Used to quickly sample rows when sampling is appropriate.
        /// </summary>
        /// <param name="selector">ArraySelector to sample</param>
        /// <param name="r">Random instance to use</param>
        /// <param name="remapArray">Array to put remapped indices in</param>
        /// <returns>ArraySelector including approximately 1/8 of the input selector rows chosen randomly</returns>
        public static ArraySelector Eighth(ArraySelector selector, Random r, ref int[] remapArray)
        {
            if (selector.Count == 0)
            {
                return(selector);
            }
            if (selector.IsSingleValue)
            {
                return(ArraySelector.Single((selector.Count / 8) + 1));
            }

            // Allocate an indices array for the sampled subset
            Allocator.AllocateToSize(ref remapArray, selector.Count);
            int sampleCount = 0;

            // Choose the rows to sample
            if (selector.Indices == null)
            {
                // If no indices, loop from start index to end index
                int i = selector.StartIndexInclusive;
                while (i < selector.EndIndexExclusive)
                {
                    // Generate one random integer
                    int random = r.Next();

                    // Choose whether the next 10 rows (30 bits) are included
                    int end = Math.Min(i + 10, selector.EndIndexExclusive);
                    for (; i < end; ++i)
                    {
                        if ((random & 7) == 0)
                        {
                            remapArray[sampleCount++] = i;
                        }
                        random = random >> 3;
                    }
                }
            }
            else
            {
                // If indices, look up the index of each row
                int i = selector.StartIndexInclusive;
                while (i < selector.EndIndexExclusive)
                {
                    // Generate one random integer
                    int random = r.Next();

                    // Choose whether the next 10 rows (30 bits) are included
                    int end = Math.Min(i + 10, selector.EndIndexExclusive);
                    for (; i < end; ++i)
                    {
                        if ((random & 7) == 0)
                        {
                            remapArray[sampleCount++] = selector.Indices[i];
                        }
                        random = random >> 3;
                    }
                }
            }

            return(ArraySelector.Map(remapArray, sampleCount));
        }