public RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null) { if (predicate(_input.Schema.Count)) { var cursors = _input.GetRowCursorSet(i => PredicatePropagation(i, predicate), n, rand); return(cursors.Select(c => new PolynomialCursor <TInput>(this, c, i => PredicatePropagation(i, predicate), _args, _inputCol, _multiplication)).ToArray()); } else { // The new column is not required. We do not need to compute it. But we need to keep the same schema. return(_input.GetRowCursorSet(predicate, n, rand) .Select(c => new SameCursor(c, this.Schema)) .ToArray()); } }
public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func <int, bool> predicate, int n, IRandom rand = null) { _host.CheckValue(predicate, nameof(predicate)); _host.CheckValueOrNull(rand); return(_view.GetRowCursorSet(out consolidator, predicate, n, rand)); }
public RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null) { TrainTransform(); _host.AssertValue(_reversedMapping, "_reversedMapping"); var cursors = _input.GetRowCursorSet(predicate, n, rand); return(cursors.Select(c => new DBScanCursor(this, c)).ToArray()); }
public RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null) { TrainTransform(); _host.AssertValue(_Results, "_Results"); var cursors = _input.GetRowCursorSet(predicate, n, rand); return(cursors.Select(c => new OpticsCursor(this, c, _args.newColumnsNumber)).ToArray()); }
public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func <int, bool> predicate, int n, IRandom rand = null) { TrainTransform(); _host.AssertValue(_Results, "_Results"); var cursors = _input.GetRowCursorSet(out consolidator, predicate, n, rand); return(cursors.Select(c => new OpticsOrderingCursor(this, c)).ToArray()); }
/// <summary> /// Create a set of cursors with additional active columns. /// </summary> /// <param name="additionalColumnsPredicate">Predicate that denotes which additional columns to include in the cursor, /// in addition to the columns that are needed for populating the <typeparamref name="TRow"/> object.</param> /// <param name="n">Number of cursors to create</param> /// <param name="rand">Random generator to use</param> public RowCursor <TRow>[] GetCursorSet(Func <int, bool> additionalColumnsPredicate, int n, Random rand) { _host.CheckValue(additionalColumnsPredicate, nameof(additionalColumnsPredicate)); _host.CheckValueOrNull(rand); var inputs = _data.GetRowCursorSet(_data.Schema.Where(col => _columnIndices.Contains(col.Index) || additionalColumnsPredicate(col.Index)), n, rand); _host.AssertNonEmpty(inputs); if (inputs.Length == 1 && n > 1) { inputs = DataViewUtils.CreateSplitCursors(_host, inputs[0], n); } _host.AssertNonEmpty(inputs); return(inputs .Select(rc => (RowCursor <TRow>)(new RowCursorImplementation(new TypedCursor(this, rc)))) .ToArray()); }
public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func <int, bool> predicate, int n, IRandom rand = null) { DataKind kind; if (_toShake[0].OutputType.IsVector()) { kind = _toShake[0].OutputType.AsVector().ItemType().RawKind(); } else { kind = _toShake[0].OutputType.RawKind(); } switch (kind) { case DataKind.R4: var cursors = _input.GetRowCursorSet(out consolidator, i => i == _inputCol || predicate(i), n, rand); return(cursors.Select(c => new ShakeInputCursor <TInput, float>(this, c, predicate, _args, _inputCol, _toShake, _shakingValues, (float x, float y) => { return x + y; })).ToArray()); default: throw _host.Except("Not supported RawKind {0}", _toShake[0].OutputType.RawKind()); } }
public virtual RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null) { _host.AssertValue(_sourceCtx, "_sourceCtx"); _host.AssertValue(_sourcePipe, "_sourcePipe"); return(_sourcePipe.GetRowCursorSet(predicate, n, rand)); }
public RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null) { _host.AssertValue(_source, "_source"); return(_source.GetRowCursorSet(predicate, n, rand)); }
public RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null) { return(_source.GetRowCursorSet(predicate, n, rand)); }
public virtual DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null) { _host.AssertValue(_input, "_input"); _host.AssertValue(_sourcePipe, "_sourcePipe"); return(_sourcePipe.GetRowCursorSet(columnsNeeded, n, rand)); }
public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func <int, bool> predicate, int n, IRandom rand = null) { _host.AssertValue(_source, "_source"); return(_mergedView.GetRowCursorSet(out consolidator, predicate, n, rand)); }
public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func <int, bool> predicate, int n, IRandom rand = null) { return(_source.GetRowCursorSet(out consolidator, predicate, n, rand)); }
public RowCursor[] GetRowCursorSet(IEnumerable <Schema.Column> columnsNeeded, int n, Random rand = null) => _source.GetRowCursorSet(columnsNeeded, n, rand);
public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func <int, bool> predicate, int n, IRandom rand = null) { ComputeStatistics(); _host.AssertValue(_input, "_input"); return(_input.GetRowCursorSet(out consolidator, predicate, n, rand)); }
public DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null) { _host.AssertValue(_input, "_input"); return(_mergedView.GetRowCursorSet(columnsNeeded, n, rand)); }
public static NearestNeighborsTrees NearestNeighborsBuild <TLabel>(IChannel ch, IDataView data, int featureIndex, int labelIndex, int idIndex, int weightIndex, out Dictionary <long, Tuple <TLabel, float> > outLabelsWeights, NearestNeighborsArguments args) where TLabel : IComparable <TLabel> { var indexes = new HashSet <int>() { featureIndex, labelIndex, weightIndex, idIndex }; if (idIndex != -1) { var colType = data.Schema[idIndex].Type; if (idIndex != -1 && (colType.IsVector() || colType.RawKind() != DataKind.I8)) { throw ch.Except("Column '{0}' must be of type '{1}' not '{2}'", args.colId, DataKind.I8, colType); } } int nt = args.numThreads ?? 1; Random rand = RandomUtils.Create(args.seed); var cursors = (nt == 1) ? new RowCursor[] { data.GetRowCursor(i => indexes.Contains(i), rand) } : data.GetRowCursorSet(i => indexes.Contains(i), nt, rand); KdTree[] kdtrees; Dictionary <long, Tuple <TLabel, float> >[] labelsWeights; if (nt == 1) { labelsWeights = new Dictionary <long, Tuple <TLabel, float> > [1]; kdtrees = new KdTree[] { BuildKDTree <TLabel>(data, cursors[0], featureIndex, labelIndex, idIndex, weightIndex, out labelsWeights[0], args) }; } else { // Multithreading. We assume the distributed set of cursor is well distributed. // No KdTree will be much smaller than the others. Action[] ops = new Action[cursors.Length]; kdtrees = new KdTree[cursors.Length]; labelsWeights = new Dictionary <long, Tuple <TLabel, float> > [cursors.Length]; for (int i = 0; i < ops.Length; ++i) { int chunkId = i; kdtrees[i] = null; ops[i] = new Action(() => { kdtrees[chunkId] = BuildKDTree <TLabel>(data, cursors[chunkId], featureIndex, labelIndex, idIndex, weightIndex, out labelsWeights[chunkId], args); }); } Parallel.Invoke(new ParallelOptions() { MaxDegreeOfParallelism = cursors.Length }, ops); } kdtrees = kdtrees.Where(c => c.Any()).ToArray(); labelsWeights = labelsWeights.Where(c => c.Any()).ToArray(); var merged = labelsWeights[0]; long start = merged.Count; long newKey; for (int i = 1; i < labelsWeights.Length; ++i) { kdtrees[i].MoveId(start); foreach (var pair in labelsWeights[i]) { newKey = pair.Key + start; if (merged.ContainsKey(newKey)) { throw ch.Except("The same key appeared twice in two differents threads: {0}", newKey); } else { merged.Add(newKey, pair.Value); } } start += labelsWeights[i].Count; } // Id checking. var labelId = merged.Select(c => c.Key).ToList(); var treeId = new List <long>(); for (int i = 0; i < kdtrees.Length; ++i) { treeId.AddRange(kdtrees[i].EnumeratePoints().Select(c => c.id)); } var h1 = new HashSet <long>(labelId); var h2 = new HashSet <long>(treeId); if (h1.Count != labelId.Count) { throw ch.Except("Duplicated label ids."); } if (h2.Count != treeId.Count) { throw ch.Except("Duplicated label ids."); } if (h1.Count != h2.Count) { throw ch.Except("Mismatch (1) in ids."); } var inter = h1.Intersect(h2); if (inter.Count() != h1.Count) { throw ch.Except("Mismatch (2) in ids."); } // End. outLabelsWeights = merged; return(new NearestNeighborsTrees(ch, kdtrees)); }
public virtual IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func <int, bool> predicate, int n, IRandom rand = null) { _host.AssertValue(_sourceCtx, "_sourceCtx"); _host.AssertValue(_sourcePipe, "_sourcePipe"); return(_sourcePipe.GetRowCursorSet(out consolidator, predicate, n, rand)); }
public DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null) { return(_source.GetRowCursorSet(columnsNeeded, n, rand)); }
public RowCursor[] GetRowCursorSet(Func <int, bool> predicate, int n, Random rand = null) { ComputeStatistics(); _host.AssertValue(_input, "_input"); return(_input.GetRowCursorSet(predicate, n, rand)); }