public XArray Remap(XArray source, ref int[] remapArray) { // See if we have the remapping cached already ArraySelector cachedMapping; if (_cachedRemappings.TryGetValue(source.Selector, out cachedMapping)) { return(source.Reselect(cachedMapping)); } // Convert the BitVector to indices if we haven't yet (deferred to first column wanting values) if (!_indicesFound) { _indicesFound = true; Allocator.AllocateToSize(ref _indices, _count); int countFound = _vector.Page(_indices, ref _nextVectorIndex, _count); if (countFound != _count) { throw new InvalidOperationException($"RowRemapper found {countFound:n0} rows when {_count:n0} expected paging in Vector with {_vector.Count:n0} total matches up to index {_nextVectorIndex:n0}."); } } // Remap the outer selector XArray remapped = source.Select(ArraySelector.Map(_indices, _count), ref remapArray); // Cache the remapping _cachedRemappings[source.Selector] = remapped.Selector; return(remapped); }
public XArray Values() { bool[] nulls = null; if (_nullItemIndex != -1) { nulls = new bool[this.Metadata.Length]; nulls[_nullItemIndex] = true; } int[] indicesInOrder = new int[this.Count]; for (int i = 0; i < this.Metadata.Length; ++i) { if (this.Metadata[i] != 0) { indicesInOrder[_values[i]] = i; } } // Build an indexed XArray pointing to the keys in insertion order XArray keysInOrder = XArray.All(_keys, this.Count, nulls).Reselect(ArraySelector.Map(indicesInOrder, this.Count)); // Convert it to a contiguous, 0-based XArray T[] contiguousCopy = null; bool[] contiguousIsNull = null; return(keysInOrder.ToContiguous <T>(ref contiguousCopy, ref contiguousIsNull)); }
public BitVector TryGetValues(XArray keys, out ArraySelector rightSideSelector) { Allocator.AllocateToSize(ref _returnedVector, keys.Count); Allocator.AllocateToSize(ref _returnedIndicesBuffer, keys.Count); _returnedVector.None(); int countFound = 0; T[] keyArray = (T[])keys.Array; for (int i = 0; i < keys.Count; ++i) { int index = keys.Index(i); int foundAtIndex; if ((keys.HasNulls && keys.NullRows[index]) || !_dictionary.TryGetValue(keyArray[index], out foundAtIndex)) { _returnedVector.Clear(i); } else { _returnedVector.Set(i); _returnedIndicesBuffer[countFound++] = foundAtIndex; } } // Write out the indices of the joined rows for each value found rightSideSelector = ArraySelector.Map(_returnedIndicesBuffer, countFound); // Return the vector of which input rows matched return(_returnedVector); }
// Return an XArray with two empty array elements before and after the valid portion and indices pointing to the valid portion public static XArray Pad(XArray values) { Array modifiedArray = null; bool[] nulls = null; Allocator.AllocateToSize(ref modifiedArray, values.Array.Length + 4, values.Array.GetType().GetElementType()); if (values.HasNulls) { nulls = new bool[values.Array.Length + 4]; } int[] indices = new int[modifiedArray.Length]; // Copy values shifted over two (so, two default values at the beginning and two at the end) for (int i = 0; i < values.Array.Length; ++i) { indices[i] = i + 2; modifiedArray.SetValue(values.Array.GetValue(values.Index(i)), indices[i]); if (values.HasNulls) { nulls.SetValue(values.NullRows.GetValue(values.Index(i)), indices[i]); } } // Return an XArray with the padded array with the indices and shorter real length int[] remapArray = null; return(XArray.All(modifiedArray, values.Count, nulls).Select(ArraySelector.Map(indices, values.Count), ref remapArray)); }
protected override void Expand() { // Build a selector of table values which were non-empty int[] indices = new int[_assignedIndices.Length]; byte[] metadata = this.Metadata; int count = 0; for (int i = 0; i < indices.Length; ++i) { if (metadata[i] != 0) { indices[count++] = i; } } // Save the old keys, ranks, and row indices in arrays XArray[] keyArrays = new XArray[_keys.Length]; for (int i = 0; i < _keys.Length; ++i) { keyArrays[i] = XArray.All(_keys[i].Values).Reselect(ArraySelector.Map(indices, count)); } XArray indicesArray = XArray.All(_assignedIndices).Reselect(ArraySelector.Map(indices, count)); // Expand the table Reset(HashCore.ResizeToSize(_assignedIndices.Length)); // Add items to the enlarged table FindOrAdd(keyArrays, indicesArray); }
public XArray Remap(XArray values, ArraySelector selector) { // Read row indices and convert to int[] XArray indexByteArray = _rowIndexReader.Read(selector); XArray indexIntArray = _rowIndexToIntConverter(indexByteArray); // Return the selected values return(values.Reselect(ArraySelector.Map((int[])indexIntArray.Array, indexIntArray.Count))); }
private void PostSortAndFilter(XArray groups, XArray counts, int totalRowCount, bool wasAllRows) { int[] finalIndices = new int[groups.Count]; int[] finalCounts = new int[groups.Count]; int groupCount = 0; // Filter to counts over the minimum percentage threshold int[] countsArray = (int[])counts.Array; if (countsArray != null) { int threshold = (int)(totalRowCount * MinimumPercentageToReport); for (int i = 0; i < groups.Count; ++i) { int count = countsArray[counts.Index(i)]; if (count >= threshold) { finalIndices[groupCount] = i; finalCounts[groupCount] = count; groupCount++; } } } // Sort the values by count descending Array.Sort <int, int>(finalCounts, finalIndices, 0, groupCount, new ReverseComparer()); // Limit to the top N if needed if (groupCount > MaximumCountToReturn) { groupCount = MaximumCountToReturn; } // Set the distinct count (now that it's known) _distinctCount = groupCount; // Set the output values int[] groupsRemap = null; XArray finalCountsX = XArray.All(finalCounts, groupCount); _columns[0].SetValues(groups.Select(ArraySelector.Map(finalIndices, groupCount), ref groupsRemap)); _columns[1].SetValues(finalCountsX); if (wasAllRows) { _columns[2].SetValues(PercentageAggregator.ToPercentageStrings(finalCountsX, totalRowCount, PercentageAggregator.TwoSigFigs)); } else { _columns[2].SetValues(PercentageAggregator.ToPercentageStrings(finalCountsX, totalRowCount, PercentageAggregator.WholePercentage)); } }
private void Convert() { // Close the row index writer _rowIndexWriter.Dispose(); _rowIndexWriter = null; // If we wrote any rows we need to convert... if (_rowCountWritten > 0) { // Get the set of unique values and get rid of the value dictionary XArray values = _dictionary.Values(); // Convert the indices previously written into raw values Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(typeof(byte), typeof(int)); using (IColumnReader rowIndexReader = new PrimitiveArrayReader <byte>(_streamProvider.OpenRead(Path.Combine(_columnPath, RowIndexFileName)))) { int rowCount = rowIndexReader.Count; ArraySelector page = ArraySelector.All(0).NextPage(rowCount, 10240); while (page.Count > 0) { // Read an XArray of indices and convert to int[] XArray rowIndices = converter(rowIndexReader.Read(page)); // Write the corresponding values // Reselect is safe because 'values' are converted to a contiguous array _valueWriter.Append(values.Reselect(ArraySelector.Map((int[])rowIndices.Array, rowIndices.Count))); page = page.NextPage(rowCount, 10240); } } } // Remove the Dictionary (so future rows are streamed out as-is) _dictionary = null; // Delete the row index file _streamProvider.Delete(Path.Combine(_columnPath, RowIndexFileName)); }
public XArray[] DistinctKeys() { // Build a map from each assigned index to the hash bucket containing it int[] indicesInOrder = new int[Count]; byte[] metadata = this.Metadata; for (int i = 0; i < metadata.Length; ++i) { if (metadata[i] != 0) { indicesInOrder[_assignedIndices[i]] = i; } } // Get the array for each key and reselect into assigned order XArray[] keyArrays = new XArray[_keys.Length]; for (int i = 0; i < _keys.Length; ++i) { keyArrays[i] = XArray.All(_keys[i].Values).Reselect(ArraySelector.Map(indicesInOrder, Count)); } return(keyArrays); }
/// <summary> /// Choose a random sample of approximately 1/8 of the rows in an ArraySelector. /// Used to quickly sample rows when sampling is appropriate. /// </summary> /// <param name="selector">ArraySelector to sample</param> /// <param name="r">Random instance to use</param> /// <param name="remapArray">Array to put remapped indices in</param> /// <returns>ArraySelector including approximately 1/8 of the input selector rows chosen randomly</returns> public static ArraySelector Eighth(ArraySelector selector, Random r, ref int[] remapArray) { if (selector.Count == 0) { return(selector); } if (selector.IsSingleValue) { return(ArraySelector.Single((selector.Count / 8) + 1)); } // Allocate an indices array for the sampled subset Allocator.AllocateToSize(ref remapArray, selector.Count); int sampleCount = 0; // Choose the rows to sample if (selector.Indices == null) { // If no indices, loop from start index to end index int i = selector.StartIndexInclusive; while (i < selector.EndIndexExclusive) { // Generate one random integer int random = r.Next(); // Choose whether the next 10 rows (30 bits) are included int end = Math.Min(i + 10, selector.EndIndexExclusive); for (; i < end; ++i) { if ((random & 7) == 0) { remapArray[sampleCount++] = i; } random = random >> 3; } } } else { // If indices, look up the index of each row int i = selector.StartIndexInclusive; while (i < selector.EndIndexExclusive) { // Generate one random integer int random = r.Next(); // Choose whether the next 10 rows (30 bits) are included int end = Math.Min(i + 10, selector.EndIndexExclusive); for (; i < end; ++i) { if ((random & 7) == 0) { remapArray[sampleCount++] = selector.Indices[i]; } random = random >> 3; } } } return(ArraySelector.Map(remapArray, sampleCount)); }