public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null)
        {
            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, Schema);

            _host.CheckValueOrNull(rand);

            var srcPredicates = _zipBinding.GetInputPredicates(predicate);

            // REVIEW: if we know the row counts, we could only open cursor if it has needed columns, and have the
            // outer cursor handle the early stopping. If we don't know row counts, we need to open all the cursors because
            // we don't know which one will be the shortest.
            // One reason this is not done currently is because the API has 'somewhat mutable' data views, so potentially this
            // optimization might backfire.
            var srcCursors = _sources
                             .Select((dv, i) => srcPredicates[i] == null ? GetMinimumCursor(dv) : dv.GetRowCursor(dv.Schema.Where(x => srcPredicates[i](x.Index)), null)).ToArray();

            return(new Cursor(this, srcCursors, predicate));
        }
Esempio n. 2
0
        public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null)
        {
            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            Host.CheckValueOrNull(rand);
            // If we aren't selecting any of the output columns, don't construct our cursor.
            // Note that because we cannot support random due to the inherently
            // stratified nature, neither can we allow the base data to be shuffled,
            // even if it supports shuffling.
            var bindings = GetBindings();

            if (!bindings.AnyNewColumnsActive(predicate))
            {
                var activeInput = bindings.GetActiveInput(predicate);
                var activeCols  = Source.Schema.Where(x => activeInput.Length > x.Index && activeInput[x.Index]);
                var inputCursor = Source.GetRowCursor(activeCols, null);
                return(new BindingsWrappedRowCursor(Host, inputCursor, bindings));
            }
            return(GetRowCursorCore(predicate));
        }
            public override DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null)
            {
                Host.CheckValueOrNull(rand);
                var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

                bool[]           active;
                Func <int, bool> inputPred = GetActive(predicate, out active);
                var inputCols = Source.Schema.Where(x => inputPred(x.Index));
                var inputs    = Source.GetRowCursorSet(inputCols, n, rand);

                Host.AssertNonEmpty(inputs);

                // No need to split if this is given 1 input cursor.
                var cursors = new DataViewRowCursor[inputs.Length];

                for (int i = 0; i < inputs.Length; i++)
                {
                    cursors[i] = new Cursor(this, inputs[i], active);
                }
                return(cursors);
            }
Esempio n. 4
0
        public sealed override RowCursor[] GetRowCursorSet(IEnumerable<Schema.Column> columnsNeeded, int n, Random rand = null)
        {
            Host.CheckValueOrNull(rand);

            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            var inputPred = _bindings.GetDependencies(predicate);
            var active = _bindings.GetActive(predicate);

            var inputCols = Source.Schema.Where(x => inputPred(x.Index));
            var inputs = Source.GetRowCursorSet(inputCols, n, rand);
            Host.AssertNonEmpty(inputs);

            if (inputs.Length == 1 && n > 1 && WantParallelCursors(predicate))
                inputs = DataViewUtils.CreateSplitCursors(Host, inputs[0], n);
            Host.AssertNonEmpty(inputs);

            var cursors = new RowCursor[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
                cursors[i] = new Cursor(Host, this, inputs[i], active);
            return cursors;
        }
Esempio n. 5
0
        public RowCursor GetRowCursor(IEnumerable<Schema.Column> columnsNeeded, Random rand = null)
        {
            Host.CheckValueOrNull(rand);

            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            var rng = CanShuffle ? rand : null;
            bool? useParallel = ShouldUseParallelCursors(predicate);

            // When useParallel is null, let the input decide, so go ahead and ask for parallel.
            // When the input wants to be split, this puts the consolidation after this transform
            // instead of before. This is likely to produce better performance, for example, when
            // this is RangeFilter.
            RowCursor curs;
            if (useParallel != false &&
                DataViewUtils.TryCreateConsolidatingCursor(out curs, this, columnsNeeded, Host, rng))
            {
                return curs;
            }

            return GetRowCursorCore(columnsNeeded, rng);
        }
Esempio n. 6
0
        public sealed override RowCursor[] GetRowCursorSet(IEnumerable <Schema.Column> columnsNeeded, int n, Random rand = null)
        {
            Host.CheckValueOrNull(rand);

            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            var inputPred = _bindings.GetDependencies(predicate);
            var active    = _bindings.GetActive(predicate);

            var inputCols = Source.Schema.Where(x => inputPred(x.Index));
            var inputs    = Source.GetRowCursorSet(inputCols, n, rand);

            Host.AssertNonEmpty(inputs);

            // No need to split if this is given 1 input cursor.
            var cursors = new RowCursor[inputs.Length];

            for (int i = 0; i < inputs.Length; i++)
            {
                cursors[i] = new Cursor(Host, _bindings, inputs[i], active);
            }
            return(cursors);
        }