Exemplo n.º 1
0
            public override ValueGetter <TValue> GetGetter <TValue>(int col)
            {
                Ch.Check(IsColumnActive(col));

                bool isSrc;
                int  index = _bindings.MapColumnIndex(out isSrc, col);

                if (isSrc)
                {
                    return(Input.GetGetter <TValue>(index));
                }

                Ch.AssertValue(_getters);
                var getter = _getters[index];

                Ch.AssertValue(getter);
                if (getter is ValueGetter <TValue> fn)
                {
                    return(fn);
                }
                throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue));
            }
Exemplo n.º 2
0
            public RandCursor(AppendRowsDataView parent, IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand, int[] counts)
                : base(parent)
            {
                Ch.AssertValue(rand);

                _rand = rand;
                Ch.AssertValue(counts);
                Ch.Assert(Sources.Length == counts.Length);
                _cursorSet = new DataViewRowCursor[counts.Length];
                for (int i = 0; i < counts.Length; i++)
                {
                    Ch.Assert(counts[i] >= 0);
                    _cursorSet[i] = parent._sources[i].GetRowCursor(columnsNeeded, RandomUtils.Create(_rand));
                }
                _sampler            = new MultinomialWithoutReplacementSampler(Ch, counts, rand);
                _currentSourceIndex = -1;

                foreach (var col in columnsNeeded)
                {
                    Getters[col.Index] = CreateGetter(col.Index);
                }
            }
            public Cursor(ParquetLoader parent, Func <int, bool> predicate, IRandom rand)
                : base(parent._host)
            {
                Ch.AssertValue(predicate);
                _loader             = parent;
                _fileStream         = parent._parquetStream;
                _parquetConversions = new ParquetConversions(Ch);
                _rand = rand;

                // Create Getter delegates
                Utils.BuildSubsetMaps(Schema.ColumnCount, predicate, out _actives, out _colToActivesIndex);
                _readerOptions = new ReaderOptions
                {
                    Count   = _loader._columnChunkReadSize,
                    Columns = _loader._columnsLoaded.Select(i => i.Name).ToArray()
                };

                // The number of blocks is calculated based on the specified rows in a block (defaults to 1M).
                // Since we want to shuffle the blocks in addition to shuffling the rows in each block, checks
                // are put in place to ensure we can produce a shuffle order for the blocks.
                var numBlocks = MathUtils.DivisionCeiling((long)parent.GetRowCount(), _readerOptions.Count);

                if (numBlocks > int.MaxValue)
                {
                    throw _loader._host.ExceptParam(nameof(Arguments.ColumnChunkReadSize), "Error due to too many blocks. Try increasing block size.");
                }
                var blockOrder = CreateOrderSequence((int)numBlocks);

                _blockEnumerator = blockOrder.GetEnumerator();

                _dataSetEnumerator = Enumerable.Empty <int>().GetEnumerator();
                _columnValues      = new IList[_actives.Length];
                _getters           = new Delegate[_actives.Length];
                for (int i = 0; i < _actives.Length; ++i)
                {
                    int columnIndex = _actives[i];
                    _getters[i] = CreateGetterDelegate(columnIndex);
                }
            }
Exemplo n.º 4
0
            public ValueGetter <TValue> GetGetter <TValue>(int col)
            {
                Ch.CheckParam(0 <= col && col < _bindings.ColumnCount, nameof(col));
                Ch.CheckParam(IsColumnActive(col), nameof(col));
                bool isSrc;
                int  index = _bindings.MapColumnIndex(out isSrc, col);

                if (isSrc)
                {
                    return(Input.GetGetter <TValue>(index));
                }
                Ch.Assert(index == 0);
                Delegate idGetter = Input.GetIdGetter();

                Ch.AssertValue(idGetter);
                var fn = idGetter as ValueGetter <TValue>;

                if (fn == null)
                {
                    throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue));
                }
                return(fn);
            }
Exemplo n.º 5
0
            /// <summary>
            /// Returns a value getter delegate to fetch the value of column with the given columnIndex, from the row.
            /// This throws if the column is not active in this row, or if the type
            /// <typeparamref name="TValue"/> differs from this column's type.
            /// </summary>
            /// <typeparam name="TValue"> is the column's content type.</typeparam>
            /// <param name="column"> is the output column whose getter should be returned.</param>
            public override ValueGetter <TValue> GetGetter <TValue>(DataViewSchema.Column column)
            {
                Ch.CheckParam(column.Index < _bindings.ColumnCount, nameof(column));
                Ch.CheckParam(IsColumnActive(column), nameof(column.Index));
                bool isSrc;
                int  index = _bindings.MapColumnIndex(out isSrc, column.Index);

                if (isSrc)
                {
                    return(Input.GetGetter <TValue>(Input.Schema[index]));
                }
                Ch.Assert(index == 0);
                Delegate idGetter = Input.GetIdGetter();

                Ch.AssertValue(idGetter);
                var fn = idGetter as ValueGetter <TValue>;

                if (fn == null)
                {
                    throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue));
                }
                return(fn);
            }
Exemplo n.º 6
0
            /// <summary>
            /// Returns a value getter delegate to fetch the value of column with the given columnIndex, from the row.
            /// This throws if the column is not active in this row, or if the type
            /// <typeparamref name="TValue"/> differs from this column's type.
            /// </summary>
            /// <typeparam name="TValue"> is the column's content type.</typeparam>
            /// <param name="column"> is the output column whose getter should be returned.</param>
            public override ValueGetter <TValue> GetGetter <TValue>(DataViewSchema.Column column)
            {
                Ch.Check(IsColumnActive(column));

                bool isSrc;
                int  index = _bindings.MapColumnIndex(out isSrc, column.Index);

                if (isSrc)
                {
                    return(Input.GetGetter <TValue>(column));
                }

                Ch.AssertValue(_getters);
                var getter = _getters[index];

                Ch.AssertValue(getter);
                if (getter is ValueGetter <TValue> fn)
                {
                    return(fn);
                }
                throw Ch.Except($"Invalid TValue in GetGetter: '{typeof(TValue)}', " +
                                $"expected type: '{getter.GetType().GetGenericArguments().First()}'.");
            }
Exemplo n.º 7
0
            public Cursor(IChannelProvider provider, RowToRowScorerBase parent, RowCursor input, bool[] active, Func <int, bool> predicateMapper)
                : base(provider, input)
            {
                Ch.AssertValue(parent);
                Ch.AssertValue(active);
                Ch.AssertValue(predicateMapper);

                _bindings = parent.GetBindings();
                Schema    = parent.OutputSchema;
                Ch.Assert(active.Length == _bindings.ColumnCount);
                _active = active;

                _output = _bindings.RowMapper.GetRow(input, predicateMapper);
                try
                {
                    Ch.Assert(_output.Schema == _bindings.RowMapper.OutputSchema);
                    _getters = parent.GetGetters(_output, iinfo => active[_bindings.MapIinfoToCol(iinfo)]);
                }
                catch (Exception)
                {
                    _output.Dispose();
                    throw;
                }
            }
Exemplo n.º 8
0
            /// <summary>
            /// Returns a value getter delegate to fetch the value of column with the given columnIndex, from the row.
            /// This throws if the column is not active in this row, or if the type
            /// <typeparamref name="TValue"/> differs from this column's type.
            /// </summary>
            /// <typeparam name="TValue"> is the column's content type.</typeparam>
            /// <param name="column"> is the output column whose getter should be returned.</param>
            public override ValueGetter <TValue> GetGetter <TValue>(DataViewSchema.Column column)
            {
                Ch.Check(IsColumnActive(column));

                bool isSrc;
                int  index = _bindings.MapColumnIndex(out isSrc, column.Index);

                if (isSrc)
                {
                    return(Input.GetGetter <TValue>(Input.Schema[index]));
                }

                Ch.AssertValue(_getters);
                var getter = _getters[index];

                Ch.Assert(getter != null);
                var fn = getter as ValueGetter <TValue>;

                if (fn == null)
                {
                    throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue));
                }
                return(fn);
            }
Exemplo n.º 9
0
            public Cursor(IChannelProvider provider, IDataTransform view, Bindings bindings, Func <int, bool> predicate, Random rand)
                : base(provider)
            {
                Ch.AssertValue(view);
                Ch.AssertValueOrNull(rand);
                Ch.Assert(view.Schema.Count >= 0);

                _view     = view;
                _bindings = bindings;
                _cursor   = view.Source.GetRowCursorForAllColumns();
                _active   = new BitArray(view.Schema.Count);

                if (predicate == null)
                {
                    _active.SetAll(true);
                }
                else
                {
                    for (int i = 0; i < view.Schema.Count; ++i)
                    {
                        _active[i] = predicate(i);
                    }
                }
            }
Exemplo n.º 10
0
            public override ValueGetter <TValue> GetGetter <TValue>(DataViewSchema.Column column)
            {
                Contracts.CheckParam(IsColumnActive(column), nameof(column), "requested column is not active");

                var col = _parent.SchemaBindings.MapColumnIndex(out bool isSrc, column.Index);

                if (isSrc)
                {
                    Contracts.AssertValue(_input);
                    return(_input.GetGetter <TValue>(_input.Schema[col]));
                }

                Ch.AssertValue(_getters);
                var getter = _getters[col];

                Ch.Assert(getter != null);
                var fn = getter as ValueGetter <TValue>;

                if (fn == null)
                {
                    throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue));
                }
                return(fn);
            }
Exemplo n.º 11
0
            // Note that we don't filter out rows with parsing issues since it's not acceptable to
            // produce a different set of rows when subsetting columns. Any parsing errors need to be
            // translated to NaN, not result in skipping the row. We should produce some diagnostics
            // to alert the user to the issues.
            private Cursor(TextLoader parent, ParseStats stats, bool[] active, LineReader reader, int srcNeeded, int cthd)
                : base(parent._host)
            {
                Ch.Assert(active == null || active.Length == parent._bindings.Infos.Length);
                Ch.AssertValue(reader);
                Ch.AssertValue(stats);
                Ch.Assert(srcNeeded >= 0);
                Ch.Assert(cthd > 0);

                _total     = -1;
                _batch     = -1;
                _bindings  = parent._bindings;
                _parser    = parent._parser;
                _active    = active;
                _reader    = reader;
                _stats     = stats;
                _srcNeeded = srcNeeded;

                ParallelState state = null;

                if (cthd > 1)
                {
                    state = new ParallelState(this, out _rows, cthd);
                }
                else
                {
                    _rows = _parser.CreateRowSet(_stats, 1, _active);
                }

                try
                {
                    _getters = new Delegate[_bindings.Infos.Length];
                    for (int i = 0; i < _getters.Length; i++)
                    {
                        if (_active != null && !_active[i])
                        {
                            continue;
                        }
                        ColumnPipe v = _rows.Pipes[i];
                        Ch.Assert(v != null);
                        _getters[i] = v.GetGetter();
                        Ch.Assert(_getters[i] != null);
                    }

                    if (state != null)
                    {
                        _ator = ParseParallel(state).GetEnumerator();
                        state = null;
                    }
                    else
                    {
                        _ator = ParseSequential().GetEnumerator();
                    }
                }
                finally
                {
                    if (state != null)
                    {
                        state.Dispose();
                    }
                }
            }