public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null) { var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, Schema); _host.CheckValueOrNull(rand); var srcPredicates = _zipBinding.GetInputPredicates(predicate); // REVIEW: if we know the row counts, we could only open cursor if it has needed columns, and have the // outer cursor handle the early stopping. If we don't know row counts, we need to open all the cursors because // we don't know which one will be the shortest. // One reason this is not done currently is because the API has 'somewhat mutable' data views, so potentially this // optimization might backfire. var srcCursors = _sources .Select((dv, i) => srcPredicates[i] == null ? GetMinimumCursor(dv) : dv.GetRowCursor(dv.Schema.Where(x => srcPredicates[i](x.Index)), null)).ToArray(); return(new Cursor(this, srcCursors, predicate)); }
public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null) { var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); Host.CheckValueOrNull(rand); // If we aren't selecting any of the output columns, don't construct our cursor. // Note that because we cannot support random due to the inherently // stratified nature, neither can we allow the base data to be shuffled, // even if it supports shuffling. var bindings = GetBindings(); if (!bindings.AnyNewColumnsActive(predicate)) { var activeInput = bindings.GetActiveInput(predicate); var activeCols = Source.Schema.Where(x => activeInput.Length > x.Index && activeInput[x.Index]); var inputCursor = Source.GetRowCursor(activeCols, null); return(new BindingsWrappedRowCursor(Host, inputCursor, bindings)); } return(GetRowCursorCore(predicate)); }
public override DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); bool[] active; Func <int, bool> inputPred = GetActive(predicate, out active); var inputCols = Source.Schema.Where(x => inputPred(x.Index)); var inputs = Source.GetRowCursorSet(inputCols, n, rand); Host.AssertNonEmpty(inputs); // No need to split if this is given 1 input cursor. var cursors = new DataViewRowCursor[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { cursors[i] = new Cursor(this, inputs[i], active); } return(cursors); }
public sealed override RowCursor[] GetRowCursorSet(IEnumerable<Schema.Column> columnsNeeded, int n, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); var inputPred = _bindings.GetDependencies(predicate); var active = _bindings.GetActive(predicate); var inputCols = Source.Schema.Where(x => inputPred(x.Index)); var inputs = Source.GetRowCursorSet(inputCols, n, rand); Host.AssertNonEmpty(inputs); if (inputs.Length == 1 && n > 1 && WantParallelCursors(predicate)) inputs = DataViewUtils.CreateSplitCursors(Host, inputs[0], n); Host.AssertNonEmpty(inputs); var cursors = new RowCursor[inputs.Length]; for (int i = 0; i < inputs.Length; i++) cursors[i] = new Cursor(Host, this, inputs[i], active); return cursors; }
public RowCursor GetRowCursor(IEnumerable<Schema.Column> columnsNeeded, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); var rng = CanShuffle ? rand : null; bool? useParallel = ShouldUseParallelCursors(predicate); // When useParallel is null, let the input decide, so go ahead and ask for parallel. // When the input wants to be split, this puts the consolidation after this transform // instead of before. This is likely to produce better performance, for example, when // this is RangeFilter. RowCursor curs; if (useParallel != false && DataViewUtils.TryCreateConsolidatingCursor(out curs, this, columnsNeeded, Host, rng)) { return curs; } return GetRowCursorCore(columnsNeeded, rng); }
public sealed override RowCursor[] GetRowCursorSet(IEnumerable <Schema.Column> columnsNeeded, int n, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); var inputPred = _bindings.GetDependencies(predicate); var active = _bindings.GetActive(predicate); var inputCols = Source.Schema.Where(x => inputPred(x.Index)); var inputs = Source.GetRowCursorSet(inputCols, n, rand); Host.AssertNonEmpty(inputs); // No need to split if this is given 1 input cursor. var cursors = new RowCursor[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { cursors[i] = new Cursor(Host, _bindings, inputs[i], active); } return(cursors); }