public int Next(int desiredCount, CancellationToken cancellationToken) { _currentEnumerateSelector = _currentEnumerateSelector.NextPage(Count, desiredCount); _currentSelector = _currentEnumerateSelector; CurrentRowCount = _currentEnumerateSelector.Count; return(CurrentRowCount); }
public XArray Remap(XArray source, ref int[] remapArray) { // See if we have the remapping cached already ArraySelector cachedMapping; if (_cachedRemappings.TryGetValue(source.Selector, out cachedMapping)) { return(source.Reselect(cachedMapping)); } // Convert the BitVector to indices if we haven't yet (deferred to first column wanting values) if (!_indicesFound) { _indicesFound = true; Allocator.AllocateToSize(ref _indices, _count); int countFound = _vector.Page(_indices, ref _nextVectorIndex, _count); if (countFound != _count) { throw new InvalidOperationException($"RowRemapper found {countFound:n0} rows when {_count:n0} expected paging in Vector with {_vector.Count:n0} total matches up to index {_nextVectorIndex:n0}."); } } // Remap the outer selector XArray remapped = source.Select(ArraySelector.Map(_indices, _count), ref remapArray); // Cache the remapping _cachedRemappings[source.Selector] = remapped.Selector; return(remapped); }
public int Next(int desiredCount, CancellationToken cancellationToken) { long countToReturn = Math.Min(_rowCount - _countEnumerated, desiredCount); if (countToReturn == 0) { return(0); } _currentEnumerateSelector = _currentEnumerateSelector.NextPage(_arrayRowCount, (int)countToReturn); if (_currentEnumerateSelector.Count == 0) { _currentEnumerateSelector = ArraySelector.All(_arrayRowCount).Slice(0, 0); _currentEnumerateSelector = _currentEnumerateSelector.NextPage(_arrayRowCount, (int)countToReturn); } _currentSelector = _currentEnumerateSelector; for (int i = 0; i < _columns.Count; ++i) { _columns[i].SetSelector(_currentEnumerateSelector); } CurrentRowCount = _currentEnumerateSelector.Count; _countEnumerated += _currentEnumerateSelector.Count; return(CurrentRowCount); }
public BitVector TryGetValues(XArray keys, out ArraySelector rightSideSelector) { Allocator.AllocateToSize(ref _returnedVector, keys.Count); Allocator.AllocateToSize(ref _returnedIndicesBuffer, keys.Count); _returnedVector.None(); int countFound = 0; T[] keyArray = (T[])keys.Array; for (int i = 0; i < keys.Count; ++i) { int index = keys.Index(i); int foundAtIndex; if ((keys.HasNulls && keys.NullRows[index]) || !_dictionary.TryGetValue(keyArray[index], out foundAtIndex)) { _returnedVector.Clear(i); } else { _returnedVector.Set(i); _returnedIndicesBuffer[countFound++] = foundAtIndex; } } // Write out the indices of the joined rows for each value found rightSideSelector = ArraySelector.Map(_returnedIndicesBuffer, countFound); // Return the vector of which input rows matched return(_returnedVector); }
protected override void Expand() { // Build a selector of table values which were non-empty int[] indices = new int[_assignedIndices.Length]; byte[] metadata = this.Metadata; int count = 0; for (int i = 0; i < indices.Length; ++i) { if (metadata[i] != 0) { indices[count++] = i; } } // Save the old keys, ranks, and row indices in arrays XArray[] keyArrays = new XArray[_keys.Length]; for (int i = 0; i < _keys.Length; ++i) { keyArrays[i] = XArray.All(_keys[i].Values).Reselect(ArraySelector.Map(indices, count)); } XArray indicesArray = XArray.All(_assignedIndices).Reselect(ArraySelector.Map(indices, count)); // Expand the table Reset(HashCore.ResizeToSize(_assignedIndices.Length)); // Add items to the enlarged table FindOrAdd(keyArrays, indicesArray); }
public CachedColumnReader(IColumnReader inner) { using (inner) { _column = inner.Read(ArraySelector.All(inner.Count)); } }
public override void Reset() { _enumerateSelector = ArraySelector.All(0); _columns[0].SetSelector(_enumerateSelector); _columns[1].SetSelector(_enumerateSelector); }
public String8Raw ReadRaw(ArraySelector selector) { if (selector.Equals(_currentRaw.Selector)) { return(_currentRaw); } bool includesFirstString = (selector.StartIndexInclusive == 0); _currentRaw.Selector = selector; // Read the string positions _currentRaw.Positions = _positionsReader.Read(ArraySelector.All(Count).Slice((includesFirstString ? 0 : selector.StartIndexInclusive - 1), selector.EndIndexExclusive)); if (_currentRaw.Positions.Selector.Indices != null) { throw new NotImplementedException("String8TypeProvider requires positions to be read contiguously."); } int[] positionArray = (int[])_currentRaw.Positions.Array; // Get the full byte range of all of the strings int firstStringStart = (includesFirstString ? 0 : positionArray[_currentRaw.Positions.Index(0)]); int lastStringEnd = positionArray[_currentRaw.Positions.Index(_currentRaw.Positions.Count - 1)]; // Read the raw string bytes _currentRaw.Bytes = _bytesReader.Read(ArraySelector.All(int.MaxValue).Slice(firstStringStart, lastStringEnd)); if (_currentRaw.Bytes.Selector.Indices != null) { throw new NotImplementedException("String8TypeProvider requires positions to be read contiguously."); } return(_currentRaw); }
public XArray Values() { bool[] nulls = null; if (_nullItemIndex != -1) { nulls = new bool[this.Metadata.Length]; nulls[_nullItemIndex] = true; } int[] indicesInOrder = new int[this.Count]; for (int i = 0; i < this.Metadata.Length; ++i) { if (this.Metadata[i] != 0) { indicesInOrder[_values[i]] = i; } } // Build an indexed XArray pointing to the keys in insertion order XArray keysInOrder = XArray.All(_keys, this.Count, nulls).Reselect(ArraySelector.Map(indicesInOrder, this.Count)); // Convert it to a contiguous, 0-based XArray T[] contiguousCopy = null; bool[] contiguousIsNull = null; return(keysInOrder.ToContiguous <T>(ref contiguousCopy, ref contiguousIsNull)); }
public XArray Read(ArraySelector selector) { XArray raw = _reader.Read(selector); XArray asInteger = (_converter == null ? raw : _converter(raw)); return(asInteger); }
private XArray ReadIndices(ArraySelector selector) { Allocator.AllocateToSize(ref _resultArray, selector.Count); // Read all string positions XArray positions = _positionsReader.Read(ArraySelector.All(_positionsReader.Count)); int[] positionArray = (int[])positions.Array; // Read all raw string bytes XArray bytes = _bytesReader.Read(ArraySelector.All(_bytesReader.Count)); byte[] textArray = (byte[])bytes.Array; // Update the String8 array to point to them for (int i = 0; i < selector.Count; ++i) { int rowIndex = selector.Index(i); int valueStart = (rowIndex == 0 ? 0 : positionArray[rowIndex - 1]); int valueEnd = positionArray[rowIndex]; _resultArray[i] = new String8(textArray, valueStart, valueEnd - valueStart); } // Cache the xarray and return it _currentArray = XArray.All(_resultArray, selector.Count); _currentSelector = selector; return(_currentArray); }
// Return an XArray with two empty array elements before and after the valid portion and indices pointing to the valid portion public static XArray Pad(XArray values) { Array modifiedArray = null; bool[] nulls = null; Allocator.AllocateToSize(ref modifiedArray, values.Array.Length + 4, values.Array.GetType().GetElementType()); if (values.HasNulls) { nulls = new bool[values.Array.Length + 4]; } int[] indices = new int[modifiedArray.Length]; // Copy values shifted over two (so, two default values at the beginning and two at the end) for (int i = 0; i < values.Array.Length; ++i) { indices[i] = i + 2; modifiedArray.SetValue(values.Array.GetValue(values.Index(i)), indices[i]); if (values.HasNulls) { nulls.SetValue(values.NullRows.GetValue(values.Index(i)), indices[i]); } } // Return an XArray with the padded array with the indices and shorter real length int[] remapArray = null; return(XArray.All(modifiedArray, values.Count, nulls).Select(ArraySelector.Map(indices, values.Count), ref remapArray)); }
public XArray Read(ArraySelector selector) { if (selector.Indices != null) { throw new NotImplementedException(); } // Return the previous xarray if re-requested if (selector.Equals(_currentSelector)) { return(_currentArray); } // Allocate the result array Allocator.AllocateToSize(ref _array, selector.Count); // Read items in pages of 64k int byteStart = _bytesPerItem * selector.StartIndexInclusive; int byteEnd = _bytesPerItem * selector.EndIndexExclusive; int bytesRead = 0; for (int currentByteIndex = byteStart; currentByteIndex < byteEnd; currentByteIndex += ReadPageSize) { int currentByteEnd = Math.Min(byteEnd, currentByteIndex + ReadPageSize); XArray bytexarray = _byteReader.Read(ArraySelector.All(int.MaxValue).Slice(currentByteIndex, currentByteEnd)); Buffer.BlockCopy(bytexarray.Array, 0, _array, bytesRead, bytexarray.Count); bytesRead += currentByteEnd - currentByteIndex; } // Cache and return the current xarray _currentArray = XArray.All(_array, selector.Count); _currentSelector = selector; return(_currentArray); }
private static void RoundTrip(string columnName, int[] array, int batchSize = 128) { XDatabaseContext context = new XDatabaseContext(); string columnPath = Path.Combine("VariableIntegerReaderWriterTests", columnName); string columnPrefix = Path.Combine(columnPath, "Vl"); context.StreamProvider.Delete(columnPath); Directory.CreateDirectory(columnPath); XArray values = XArray.All(array, array.Length); using (IColumnWriter writer = new VariableIntegerWriter(context.StreamProvider, columnPrefix)) { ArraySelector page = ArraySelector.All(0).NextPage(array.Length, batchSize); while (page.Count > 0) { writer.Append(values.Reselect(page)); page = page.NextPage(array.Length, batchSize); } } XArray returned = default(XArray); using (IColumnReader reader = new VariableIntegerReader(context.StreamProvider, columnPrefix, CachingOption.AsConfigured)) { returned = reader.Read(ArraySelector.All(array.Length)); } TableTestHarness.AssertAreEqual(values, returned, array.Length); context.StreamProvider.Delete(columnPath); }
public void SetSelector(ArraySelector currentSelector) { if (currentSelector.Count != 0 && _allValues.Array == null) { throw new InvalidOperationException("SetValues must be called before SetSelector on DeferredArrayColumn."); } _currentSelector = currentSelector; }
public SchemaTransformer(IXTable source) : base(source) { _columns = new ArrayColumn[2]; _columns[0] = new ArrayColumn(XArray.All(_source.Columns.Select((col) => col.ColumnDetails.Name).ToArray()), new ColumnDetails("Name", typeof(string))); _columns[1] = new ArrayColumn(XArray.All(_source.Columns.Select((col) => col.ColumnDetails.Type.Name.ToString()).ToArray()), new ColumnDetails("Type", typeof(string))); _enumerateSelector = ArraySelector.All(0); }
public void Reset() { _currentEnumerateSelector = ArraySelector.All(_distinctCount).Slice(0, 0); for (int i = 0; i < _columns.Length; ++i) { _columns[i].SetSelector(_currentEnumerateSelector); } }
public override int Next(int desiredCount, CancellationToken cancellationToken) { _enumerateSelector = _enumerateSelector.NextPage(_source.Columns.Count, desiredCount); _columns[0].SetSelector(_enumerateSelector); _columns[1].SetSelector(_enumerateSelector); return(_enumerateSelector.Count); }
public XArray Remap(XArray values, ArraySelector selector) { // Read row indices and convert to int[] XArray indexByteArray = _rowIndexReader.Read(selector); XArray indexIntArray = _rowIndexToIntConverter(indexByteArray); // Return the selected values return(values.Reselect(ArraySelector.Map((int[])indexIntArray.Array, indexIntArray.Count))); }
public void Reset() { _countEnumerated = 0; _currentEnumerateSelector = ArraySelector.All(_arrayRowCount).Slice(0, 0); for (int i = 0; i < _columns.Count; ++i) { _columns[i].SetSelector(_currentEnumerateSelector); } }
public HasSingleParameterConstructor(ArraySelector <ConstructorInfo, ParameterInfo> parameters) : base(parameters.FirstAssigned() .Select(Start.An.Instance(ParameterType.Default) .Then() .Metadata() .Select(Is.AssignableFrom <T>().Get()) .EnsureAssignedOrDefault()) .Then() .And(parameters.Subject.Select(RemainingParametersAreOptional.Default))) { }
public XArray Read(ArraySelector selector) { if (selector.Equals(_currentSelector)) { return(_currentArray); } _currentArray = _converter(_innerReader.Read(selector)); _currentSelector = selector; return(_currentArray); }
private void PostSortAndFilter(XArray groups, XArray counts, int totalRowCount, bool wasAllRows) { int[] finalIndices = new int[groups.Count]; int[] finalCounts = new int[groups.Count]; int groupCount = 0; // Filter to counts over the minimum percentage threshold int[] countsArray = (int[])counts.Array; if (countsArray != null) { int threshold = (int)(totalRowCount * MinimumPercentageToReport); for (int i = 0; i < groups.Count; ++i) { int count = countsArray[counts.Index(i)]; if (count >= threshold) { finalIndices[groupCount] = i; finalCounts[groupCount] = count; groupCount++; } } } // Sort the values by count descending Array.Sort <int, int>(finalCounts, finalIndices, 0, groupCount, new ReverseComparer()); // Limit to the top N if needed if (groupCount > MaximumCountToReturn) { groupCount = MaximumCountToReturn; } // Set the distinct count (now that it's known) _distinctCount = groupCount; // Set the output values int[] groupsRemap = null; XArray finalCountsX = XArray.All(finalCounts, groupCount); _columns[0].SetValues(groups.Select(ArraySelector.Map(finalIndices, groupCount), ref groupsRemap)); _columns[1].SetValues(finalCountsX); if (wasAllRows) { _columns[2].SetValues(PercentageAggregator.ToPercentageStrings(finalCountsX, totalRowCount, PercentageAggregator.TwoSigFigs)); } else { _columns[2].SetValues(PercentageAggregator.ToPercentageStrings(finalCountsX, totalRowCount, PercentageAggregator.WholePercentage)); } }
public int Next(int desiredCount, CancellationToken cancellationToken) { _currentEnumerateSelector = _currentEnumerateSelector.NextPage(_rowCount, desiredCount); _currentSelector = _currentEnumerateSelector; for (int i = 0; i < _columns.Count; ++i) { _columns[i].SetSelector(_currentEnumerateSelector); } CurrentRowCount = _currentEnumerateSelector.Count; return(CurrentRowCount); }
public XArray Read(ArraySelector selector) { if (selector.Indices != null) { throw new NotImplementedException(); } Allocator.AllocateToSize(ref _array, selector.Count); _stream.Seek(selector.StartIndexInclusive, SeekOrigin.Begin); _stream.Read(_array, 0, selector.Count); return(XArray.All(_array, selector.Count)); }
// Values returns the set of distinct values themselves public XArray Values() { // Read the values (if we haven't previously) if (_allValues.Array == null) { _allValues = _valueReader.Read(ArraySelector.All(_valueReader.Count)); if (_allValues.Selector.Indices != null || _allValues.Selector.StartIndexInclusive != 0) { throw new InvalidOperationException("EnumColumnReader values reader must read values contiguously."); } } return(_allValues); }
private void BuildJoinDictionary(CancellationToken cancellationToken) { // Validate the RHS is a seekable table (only on build, so that Suggest doesn't fail) ISeekableXTable joinToSource = _joinToSource as ISeekableXTable; if (joinToSource == null) { throw new ArgumentException($"Join requires a single built Binary Table as the right side table."); } XArray allJoinToValues = _joinToSeekGetter(ArraySelector.All(joinToSource.Count)); _joinDictionary = (IJoinDictionary)Allocator.ConstructGenericOf(typeof(JoinDictionary <>), _joinColumnType, allJoinToValues.Count); _joinDictionary.Add(allJoinToValues, 0); }
public void Sampler_Basics() { Random r = new Random(8); ArraySelector all = ArraySelector.All(10240); int[] eighthArray = null; ArraySelector eighth = Sampler.Eighth(all, r, ref eighthArray); AssertClose(all.Count / 8, eighth.Count, 0.2f); int[] sixtyfourthArray = null; ArraySelector sixtyfourth = Sampler.Eighth(eighth, r, ref sixtyfourthArray); AssertClose(eighth.Count / 8, sixtyfourth.Count, 0.2f); }
private void BuildSingleEnumColumnDictionary(CancellationToken cancellationToken) { XArray values = _keyColumns[0].ValuesGetter()(); Func <XArray> indicesGetter = _keyColumns[0].IndicesCurrentGetter(); // Find or construct an aggregator which can track which enum values ended up with any rows in the result IFoundIndicesTracker tracker = (IFoundIndicesTracker)_aggregators.FirstOrDefault((agg) => agg is IFoundIndicesTracker); bool trackerFound = (tracker != null); if (!trackerFound) { tracker = new CountAggregator(); } int count; while ((count = _source.Next(XTableExtensions.DefaultBatchSize, cancellationToken)) != 0) { // Aggregate each row directly on the row index (already a small zero-based value) XArray indices = indicesGetter(); for (int i = 0; i < _aggregators.Length; ++i) { _aggregators[i].Add(indices, values.Count); } if (!trackerFound) { tracker.Add(indices, values.Count); } } // Figure out which rows had matches ArraySelector foundValuesSelector = tracker.FoundIndices; // Store the distinct count now that we know it _distinctCount = foundValuesSelector.Count; // Once the loop is done, get the distinct values and aggregation results _columns[0].SetValues(values.Reselect(foundValuesSelector)); for (int i = 0; i < _aggregators.Length; ++i) { _columns[i + 1].SetValues(_aggregators[i].Values.Reselect(foundValuesSelector)); } }
public void Sample() { Random r = new Random(8); ArraySelector all = ArraySelector.All(10240); int[] eighthArray = null; int[] sixtyfourthArray = null; using (Benchmarker b = new Benchmarker($"Sampler.Eighth", DefaultMeasureMilliseconds)) { b.Measure("Sampler.Eighth", all.Count, () => { ArraySelector eighth = Sampler.Eighth(all, r, ref eighthArray); ArraySelector sixtyfourth = Sampler.Eighth(eighth, r, ref sixtyfourthArray); return(sixtyfourth.Count); }); } }