public Cursor(ParquetLoader parent, Func <int, bool> predicate, IRandom rand) : base(parent._host) { Ch.AssertValue(predicate); _loader = parent; _fileStream = parent._parquetStream; _parquetConversions = new ParquetConversions(Ch); _rand = rand; // Create Getter delegates Utils.BuildSubsetMaps(Schema.ColumnCount, predicate, out _actives, out _colToActivesIndex); _readerOptions = new ReaderOptions { Count = _loader._columnChunkReadSize, Columns = _loader._columnsLoaded.Select(i => i.Name).ToArray() }; int numBlocks = (int)Math.Ceiling(((decimal)parent.GetRowCount() / _readerOptions.Count)); int[] blockOrder = _rand == null?Utils.GetIdentityPermutation(numBlocks) : Utils.GetRandomPermutation(rand, numBlocks); _blockEnumerator = blockOrder.GetEnumerator(); _dataSetEnumerator = new int[0].GetEnumerator(); // Initialize an empty enumerator to get started _columnValues = new IList[_actives.Length]; _getters = new Delegate[_actives.Length]; for (int i = 0; i < _actives.Length; ++i) { int columnIndex = _actives[i]; _getters[i] = CreateGetterDelegate(columnIndex); } }
public Cursor(ParquetLoader parent, Func <int, bool> predicate, IRandom rand) : base(parent._host) { Ch.AssertValue(predicate); Ch.AssertValue(parent._parquetStream); _loader = parent; _fileStream = parent._parquetStream; _parquetConversions = new ParquetConversions(Ch); _rand = rand; // Create Getter delegates Utils.BuildSubsetMaps(Schema.ColumnCount, predicate, out _actives, out _colToActivesIndex); _readerOptions = new ReaderOptions { Count = _loader._columnChunkReadSize, Columns = _loader._columnsLoaded.Select(i => i.Name).ToArray() }; // The number of blocks is calculated based on the specified rows in a block (defaults to 1M). // Since we want to shuffle the blocks in addition to shuffling the rows in each block, checks // are put in place to ensure we can produce a shuffle order for the blocks. var numBlocks = MathUtils.DivisionCeiling((long)parent.GetRowCount(), _readerOptions.Count); if (numBlocks > int.MaxValue) { throw _loader._host.ExceptParam(nameof(Arguments.ColumnChunkReadSize), "Error due to too many blocks. Try increasing block size."); } var blockOrder = CreateOrderSequence((int)numBlocks); _blockEnumerator = blockOrder.GetEnumerator(); _dataSetEnumerator = Enumerable.Empty <int>().GetEnumerator(); _columnValues = new IList[_actives.Length]; _getters = new Delegate[_actives.Length]; for (int i = 0; i < _actives.Length; ++i) { int columnIndex = _actives[i]; _getters[i] = CreateGetterDelegate(columnIndex); } }