protected override bool MoveNextCore() { if (!_input.MoveNext()) { return(false); } if (!_newGroupInInputCursorDel()) { return(true); } // If this is the first step, we need to move next on _groupCursor. Otherwise, the position of _groupCursor is // at the start of the next group. if (_groupCursor.State == CursorState.NotStarted) { // The two cursors should have the same number of elements, so if _input.MoveNext() returned true, // then it must return true here too. var good = _groupCursor.MoveNext() && _newGroupInGroupCursorDel(); Ch.Assert(good); } // Read the whole group from the auxiliary cursor. while (_groupCursor.State != CursorState.Done && !_newGroupInGroupCursorDel()) { TLabel label = default(TLabel); TScore score = default(TScore); _labelGetter(ref label); _scoreGetter(ref score); _parent.ProcessExample(_state, label, score); _groupCursor.MoveNext(); } _parent.UpdateState(_state); return(true); }
public bool MoveNext() { if (_copy > 0) { --_copy; return(true); } bool r = _inputCursor.MoveNext(); if (!r) { return(r); } if (_cache != null) { _idGetter(ref _currentId); _copy = _cache[_currentId]; } else { _copy = NextPoisson(_lambda, _rand); _copy = Math.Min(_copy, _maxReplica); } while (_copy <= 0) { r = _inputCursor.MoveNext(); if (!r) { return(r); } if (_cache != null) { _idGetter(ref _currentId); _copy = _cache[_currentId]; } else if (_classGetter == null) { _copy = NextPoisson(_lambda, _rand); _copy = Math.Min(_copy, _maxReplica); } else { _classGetter(ref _currentCl); _copy = _currentCl.Equals(_classValue) ? NextPoisson(_lambda, _rand) : 1; _copy = Math.Min(_copy, _maxReplica); } } --_copy; return(true); }
protected override bool MoveNextCore() { // If leading cursor is not started, start it. if (_leadingCursor.State == CursorState.NotStarted) { _leadingCursor.MoveNext(); } if (_leadingCursor.State == CursorState.Done) { // Leading cursor reached the end of the input on the previous MoveNext. return(false); } // Then, advance the leading cursor until it hits the end of the group (or the end of the data). int groupSize = 0; while (_leadingCursor.State == CursorState.Good && IsSameGroup()) { groupSize++; _leadingCursor.MoveNext(); } // The group can only be empty if the leading cursor immediately reaches the end of the data. // This is handled by the check above. Ch.Assert(groupSize > 0); // Catch up with the trailing cursor and populate all the aggregates. // REVIEW: this could be done lazily, but still all aggregators together. foreach (var agg in _aggregators.Where(x => x != null)) { agg.SetSize(groupSize); } for (int i = 0; i < groupSize; i++) { var res = _trailingCursor.MoveNext(); Ch.Assert(res); foreach (var agg in _aggregators.Where(x => x != null)) { agg.ReadValue(i); } } return(true); }
internal static List <ConfusionMatrix> Create(IHostEnvironment env, IDataView confusionMatrix) { Contracts.AssertValue(env); env.AssertValue(confusionMatrix); if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn)) { throw env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column."); } IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); var slots = default(VBuffer <ReadOnlyMemory <char> >); confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots); var slotsValues = slots.GetValues(); string[] classNames = new string[slotsValues.Length]; for (int i = 0; i < slotsValues.Length; i++) { classNames[i] = slotsValues[i].ToString(); } ColumnType type = confusionMatrix.Schema.GetColumnType(countColumn); env.Assert(type.IsVector); ValueGetter <VBuffer <double> > countGetter = cursor.GetGetter <VBuffer <double> >(countColumn); VBuffer <double> countValues = default; List <ConfusionMatrix> confusionMatrices = new List <ConfusionMatrix>(); int valuesRowIndex = 0; double[,] elements = null; while (cursor.MoveNext()) { if (valuesRowIndex == 0) { elements = new double[type.VectorSize, type.VectorSize]; } countGetter(ref countValues); ReadOnlySpan <double> values = countValues.GetValues(); for (int i = 0; i < values.Length; i++) { elements[valuesRowIndex, i] = values[i]; } valuesRowIndex++; if (valuesRowIndex == type.VectorSize) { valuesRowIndex = 0; confusionMatrices.Add(new ConfusionMatrix(elements, classNames)); } } return(confusionMatrices); }
private async Task LoopProducerWorker() { try { int circularIndex = 0; for (; ;) { int requested = await _toProduce.ReceiveAsync(); if (requested == 0) { // We had some sort of early exit. Just go out, do not post even the // sentinel to the consumer, as nothing will be consumed any more. return; } Ch.Assert(requested >= _blockSize); int numRows; for (numRows = 0; numRows < requested; ++numRows) { Ch.Assert(0 <= circularIndex & circularIndex < _pipeIndices.Length); if (!_input.MoveNext()) { break; } int pipeIndex = _pipeIndices[circularIndex++]; Ch.Assert(0 <= pipeIndex & pipeIndex < _pipeIndices.Length); for (int c = 0; c < _pipes.Length; ++c) { _pipes[c].Fill(pipeIndex); } if (circularIndex == _pipeIndices.Length) { circularIndex = 0; } } PostAssert(_toConsume, numRows); if (numRows < requested) { // We've reached the end of the cursor. Send the sentinel, then exit. // This assumes that the receiver will receive things in Post order // (so that the sentinel is received, after the last Post). if (numRows > 0) { PostAssert(_toConsume, 0); } return; } } } catch (Exception ex) { _producerTaskException = ex; // Send the sentinel in this case as well, the field will be checked. PostAssert(_toConsume, 0); } }
public bool MoveNext() { var res = _inputCursor.MoveNext(); if (!res) { return(res); } RetrieveNeighbors(); return(true); }
protected override bool MoveNextCore() { // Iterate sub cursor or move to the next file. while (_subCursor == null || !_subCursor.MoveNext()) { // Cleanup old sub cursor if (_subCursor != null) { _subCursor.Dispose(); _subCursor = null; } if (!TryGetNextPathAndValues(out string path, out string relativePath, out List <string> values)) { return(false); } IDataLoader loader = null; try { // Load the sub cursor and reset the data. loader = _parent.CreateLoaderFromBytes(_parent._subLoaderBytes, new MultiFileSource(path)); } catch (Exception e) { Ch.Warning($"Failed to load file {path} due to a loader exception. Moving on to the next file. Ex: {e.Message}"); continue; } _subCursor = loader.GetRowCursor(col => _subActive[col]); try { UpdateSubGetters(); UpdateColumnValues(relativePath, values); } catch (InvalidOperationException e) { // Failed to load this file so skip. Ch.Warning(MessageSensitivity.Schema, e.Message); if (_subCursor != null) { _subCursor.Dispose(); _subCursor = null; } } } return(true); }
protected override bool MoveNextCore() { bool isAccepted = false; while (!isAccepted) { if (!_input.MoveNext()) { return(false); } RunLambda(out isAccepted); } return(true); }
private int RowsPerBlockHeuristic(IDataView data, ColumnCodec[] actives) { // If we did not set a size bound, return the old bound. if (!_maxBytesPerBlock.HasValue) { _host.Assert(_maxRowsPerBlock.HasValue && _maxRowsPerBlock.Value > 0); // argument validation should have ensured this return(_maxRowsPerBlock.Value); } long maxBytes = _maxBytesPerBlock.Value; // First get the cursor. HashSet <int> active = new HashSet <int>(actives.Select(cc => cc.SourceIndex)); Random rand = data.CanShuffle ? new TauswortheHybrid(_host.Rand) : null; // Get the estimators. EstimatorDelegate del = EstimatorCore <int>; MethodInfo methInfo = del.GetMethodInfo().GetGenericMethodDefinition(); using (IRowCursor cursor = data.GetRowCursor(active.Contains, rand)) { object[] args = new object[] { cursor, null, null, null }; var writers = new IValueWriter[actives.Length]; var estimators = new Func <long> [actives.Length]; for (int c = 0; c < actives.Length; ++c) { var col = actives[c]; args[1] = col; methInfo.MakeGenericMethod(col.Codec.Type.RawType).Invoke(this, args); estimators[c] = (Func <long>)args[2]; writers[c] = (IValueWriter)args[3]; } int rows = 0; // We can't really support more than this. int maxRowsPerBlock = _maxRowsPerBlock.GetValueOrDefault(int.MaxValue); while (rows < maxRowsPerBlock) { if (!cursor.MoveNext()) { break; // We'll just have one block for each column. } long totalEstimate = estimators.Sum(c => c()); if (totalEstimate > maxBytes) { break; } rows++; } return(Math.Max(1, rows)); // Possible that even a single row exceeds the "limit". } }
public void Run(IRowCursor cursor, ref long count, out int minLen, out int maxLen) { minLen = int.MaxValue; maxLen = 0; Action <StringBuilder, int> append = (sb, index) => AppendItem(sb, index, _pipes[_col].Default); Action <StringBuilder, int> appendHeader = (sb, index) => AppendItem(sb, index, ""); if (_hasHeader) { StartLine(); while (_col < _pipes.Length) { int len; _pipes[_col].WriteHeader(appendHeader, out len); Contracts.Assert(len >= 0); EndColumn(len); } EndLine("\"\""); _writer.WriteLine(); } while (cursor.MoveNext()) { // Start a new line. This also starts the first column. StartLine(); while (_col < _pipes.Length) { int len; _pipes[_col].WriteData(append, out len); Contracts.Assert(len >= 0); EndColumn(len); } if (minLen > _dstBase) { minLen = _dstBase; } if (maxLen < _dstBase) { maxLen = _dstBase; } EndLine(); _writer.WriteLine(); count++; } }
protected override bool MoveNextCore() { Ch.AssertValue(_currentCursor); while (!_currentCursor.MoveNext()) { // Mark the current cursor as finished. _currentCursor.Dispose(); _currentCursor = null; if (++_currentSourceIndex >= Sources.Length) { return(false); } _currentCursor = Sources[_currentSourceIndex].GetRowCursor(c => IsColumnActive(c)); _currentIdGetter = _currentCursor.GetIdGetter(); } return(true); }
internal static ConfusionMatrix Create(IHostEnvironment env, IDataView confusionMatrix) { Contracts.AssertValue(env); env.AssertValue(confusionMatrix); if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn)) { env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column."); } ColumnType type = confusionMatrix.Schema.GetColumnType(countColumn); env.Assert(type.IsVector); double[,] elements = new double[type.VectorSize, type.VectorSize]; IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); ValueGetter <VBuffer <double> > countGetter = cursor.GetGetter <VBuffer <double> >(countColumn); VBuffer <double> countValues = default; int valuesRowIndex = 0; while (cursor.MoveNext()) { countGetter(ref countValues); for (int i = 0; i < countValues.Length; i++) { elements[valuesRowIndex, i] = countValues.Values[i]; } valuesRowIndex++; } var slots = default(VBuffer <DvText>); confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots); string[] classNames = new string[slots.Count]; for (int i = 0; i < slots.Count; i++) { classNames[i] = slots.Values[i].ToString(); } return(new ConfusionMatrix(elements, classNames)); }
private static KdTree BuildKDTree <TLabel>(IDataView data, IRowCursor cursor, int featureIndex, int labelIndex, int idIndex, int weightIndex, out Dictionary <long, Tuple <TLabel, float> > labelsWeights, NearestNeighborsArguments args) where TLabel : IComparable <TLabel> { using (cursor) { var featureGetter = cursor.GetGetter <VBuffer <float> >(featureIndex); var labelGetter = labelIndex >= 0 ? cursor.GetGetter <TLabel>(labelIndex) : null; var weightGetter = weightIndex >= 0 ? cursor.GetGetter <float>(weightIndex) : null; var idGetter = idIndex >= 0 ? cursor.GetGetter <long>(idIndex) : null; var kdtree = new KdTree(distance: args.distance, seed: args.seed); labelsWeights = new Dictionary <long, Tuple <TLabel, float> >(); VBuffer <float> features = new VBuffer <float>(); TLabel label = default(TLabel); float weight = 1; long lid = default(long); while (cursor.MoveNext()) { featureGetter(ref features); if (labelGetter != null) { labelGetter(ref label); } if (weightGetter != null) { weightGetter(ref weight); } if (idGetter != null) { idGetter(ref lid); } else { lid = labelsWeights.Count; } labelsWeights[lid] = new Tuple <TLabel, float>(label, weight); var point = new PointIdFloat(lid, features, true); kdtree.Add(point); } return(kdtree); } }
public bool MoveNext() { var r = _inputCursor.MoveNext(); if (!r) { return(r); } _inputGetter(ref _inputValue); switch (_args.algo) { case ShakeInputAlgorithm.exhaustive: FillShakingValuesExhaustive(); break; default: throw Contracts.Except("Not available algo {0}", _args.algo); } return(true); }
void LoadCache <TClass>(IRandom rand, IRowCursor cur, int classColumn, TClass valueClass, IChannel ch) { _cacheReplica = new Dictionary <UInt128, int>(); var hist = new Dictionary <TClass, long>(); var gid = cur.GetIdGetter(); var gcl = cur.GetGetter <TClass>(classColumn); UInt128 did = default(UInt128); TClass cl = default(TClass); long nbIn = 0; long nbOut = 0; int rep; while (cur.MoveNext()) { gcl(ref cl); gid(ref did); if (!hist.ContainsKey(cl)) { hist[cl] = 1; } else { ++hist[cl]; } if (cl.Equals(valueClass)) { rep = NextPoisson(_args.lambda, rand); ++nbIn; } else { rep = 1; ++nbOut; } _cacheReplica[did] = rep; } if (nbIn == 0) { ch.Warning("Resample on a condition never happened: nbIn={0} nbOut={1}", nbIn, nbOut); } }
public bool MoveNext() { return(_input.MoveNext()); }
protected bool CheckSameValues(IRowCursor curs1, IDataView view2, bool exactTypes = true, bool exactDoubles = true, bool checkId = true) { Contracts.Assert(curs1.Schema.ColumnCount == view2.Schema.ColumnCount); // Get a cursor for each column. int colLim = curs1.Schema.ColumnCount; var cursors = new IRowCursor[colLim]; try { for (int col = 0; col < colLim; col++) { // curs1 should have all columns active (for simplicity of the code here). Contracts.Assert(curs1.IsColumnActive(col)); cursors[col] = view2.GetRowCursor(c => c == col); } // Get the comparison delegates for each column. Func <bool>[] comps = new Func <bool> [colLim]; // We have also one ID comparison delegate for each cursor. Func <bool>[] idComps = new Func <bool> [cursors.Length]; for (int col = 0; col < colLim; col++) { Contracts.Assert(cursors[col] != null); var type1 = curs1.Schema.GetColumnType(col); var type2 = cursors[col].Schema.GetColumnType(col); if (!EqualTypes(type1, type2, exactTypes)) { Fail("Different types"); return(Failed()); } comps[col] = GetColumnComparer(curs1, cursors[col], col, type1, exactDoubles); ValueGetter <UInt128> idGetter; idComps[col] = checkId ? GetIdComparer(curs1, cursors[col], out idGetter) : null; } for (; ;) { bool f1 = curs1.MoveNext(); for (int col = 0; col < colLim; col++) { bool f2 = cursors[col].MoveNext(); if (f1 != f2) { if (f1) { Fail("Left has more rows at position: {0}", curs1.Position); } else { Fail("Right {0} has more rows at position: {1}", col, cursors[2].Position); } return(Failed()); } } if (!f1) { return(true); } for (int col = 0; col < colLim; col++) { Contracts.Assert(curs1.Position == cursors[col].Position); var comp = comps[col]; if (comp != null && !comp()) { Fail("Different values in column {0} of row {1}", col, curs1.Position); return(Failed()); } comp = idComps[col]; if (comp != null && !comp()) { Fail("Different values in ID values for column {0} cursor of row {1}", col, curs1.Position); return(Failed()); } } } } finally { for (int col = 0; col < colLim; col++) { var c = cursors[col]; if (c != null) { c.Dispose(); } } } }
public HashTransform(IHostEnvironment env, Arguments args, IDataView input) : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestType) { if (args.HashBits < NumBitsMin || args.HashBits >= NumBitsLim) { throw Host.ExceptUserArg(nameof(args.HashBits), "hashBits should be between {0} and {1} inclusive", NumBitsMin, NumBitsLim - 1); } _exes = new ColInfoEx[Infos.Length]; List <int> invertIinfos = null; List <int> invertHashMaxCounts = null; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args, args.Column[iinfo]); int invertHashMaxCount = GetAndVerifyInvertHashMaxCount(args, args.Column[iinfo], _exes[iinfo]); if (invertHashMaxCount > 0) { Utils.Add(ref invertIinfos, iinfo); Utils.Add(ref invertHashMaxCounts, invertHashMaxCount); } } _types = InitColumnTypes(); if (Utils.Size(invertIinfos) > 0) { // Build the invert hashes for all columns for which it was requested. var srcs = new HashSet <int>(invertIinfos.Select(i => Infos[i].Source)); using (IRowCursor srcCursor = input.GetRowCursor(srcs.Contains)) { using (var ch = Host.Start("Invert hash building")) { InvertHashHelper[] helpers = new InvertHashHelper[invertIinfos.Count]; Action disposer = null; for (int i = 0; i < helpers.Length; ++i) { int iinfo = invertIinfos[i]; Host.Assert(_types[iinfo].ItemType.KeyCount > 0); var dstGetter = GetGetterCore(ch, srcCursor, iinfo, out disposer); Host.Assert(disposer == null); var ex = _exes[iinfo]; var maxCount = invertHashMaxCounts[i]; helpers[i] = InvertHashHelper.Create(srcCursor, Infos[iinfo], ex, maxCount, dstGetter); } while (srcCursor.MoveNext()) { for (int i = 0; i < helpers.Length; ++i) { helpers[i].Process(); } } _keyValues = new VBuffer <DvText> [_exes.Length]; _kvTypes = new ColumnType[_exes.Length]; for (int i = 0; i < helpers.Length; ++i) { _keyValues[invertIinfos[i]] = helpers[i].GetKeyValuesMetadata(); Host.Assert(_keyValues[invertIinfos[i]].Length == _types[invertIinfos[i]].ItemType.KeyCount); _kvTypes[invertIinfos[i]] = new VectorType(TextType.Instance, _keyValues[invertIinfos[i]].Length); } ch.Done(); } } } SetMetadata(); }
public bool MoveNext() { currentCursor = cursors[cursorIndex]; if (!currentCursor.MoveNext()) { // Set to before the first of the next cursor in the set currentCursor = cursors[++cursorIndex]; cursorOffset = -1; currentCursor.MoveTo(cursorOffset); } return ++position < Count; }
public bool MoveNext() { return(_cursor.MoveNext()); }
public bool MoveNext() => _input.MoveNext();
public bool MoveNext() { return(_inputCursor.MoveNext()); }
protected bool CheckSameValues(IRowCursor curs1, IRowCursor curs2, bool exactTypes, bool exactDoubles, bool checkId, bool checkIdCollisions = true) { Contracts.Assert(curs1.Schema.ColumnCount == curs2.Schema.ColumnCount); // Get the comparison delegates for each column. int colLim = curs1.Schema.ColumnCount; Func <bool>[] comps = new Func <bool> [colLim]; for (int col = 0; col < colLim; col++) { var f1 = curs1.IsColumnActive(col); var f2 = curs2.IsColumnActive(col); if (f1 && f2) { var type1 = curs1.Schema.GetColumnType(col); var type2 = curs2.Schema.GetColumnType(col); if (!EqualTypes(type1, type2, exactTypes)) { Fail("Different types"); return(Failed()); } comps[col] = GetColumnComparer(curs1, curs2, col, type1, exactDoubles); } } ValueGetter <UInt128> idGetter = null; Func <bool> idComp = checkId ? GetIdComparer(curs1, curs2, out idGetter) : null; HashSet <UInt128> idsSeen = null; if (checkIdCollisions && idGetter == null) { idGetter = curs1.GetIdGetter(); } long idCollisions = 0; UInt128 id = default(UInt128); for (; ;) { bool f1 = curs1.MoveNext(); bool f2 = curs2.MoveNext(); if (f1 != f2) { if (f1) { Fail("Left has more rows at position: {0}", curs1.Position); } else { Fail("Right has more rows at position: {0}", curs2.Position); } return(Failed()); } if (!f1) { if (idCollisions > 0) { Fail("{0} id collisions among {1} items", idCollisions, Utils.Size(idsSeen) + idCollisions); } return(idCollisions == 0); } else if (checkIdCollisions) { idGetter(ref id); if (!Utils.Add(ref idsSeen, id)) { if (idCollisions == 0) { idCollisions++; } } } Contracts.Assert(curs1.Position == curs2.Position); for (int col = 0; col < colLim; col++) { var comp = comps[col]; if (comp != null && !comp()) { Fail("Different values in column {0} of row {1}", col, curs1.Position); return(Failed()); } if (idComp != null && !idComp()) { Fail("Different values in ID of row {0}", curs1.Position); return(Failed()); } } } }
private void FetchWorker(BlockingCollection <Block> toCompress, IDataView data, ColumnCodec[] activeColumns, int rowsPerBlock, Stopwatch sw, IChannel ch, IProgressChannel pch, ExceptionMarshaller exMarshaller) { Contracts.AssertValue(ch); Contracts.AssertValueOrNull(pch); ch.AssertValue(exMarshaller); try { ch.AssertValue(toCompress); ch.AssertValue(data); ch.AssertValue(activeColumns); ch.AssertValue(sw); ch.Assert(rowsPerBlock > 0); // The main thread handles fetching from the cursor, and storing it into blocks passed to toCompress. HashSet <int> activeSet = new HashSet <int>(activeColumns.Select(col => col.SourceIndex)); long blockIndex = 0; int remainingInBlock = rowsPerBlock; using (IRowCursor cursor = data.GetRowCursor(activeSet.Contains)) { WritePipe[] pipes = new WritePipe[activeColumns.Length]; for (int c = 0; c < activeColumns.Length; ++c) { pipes[c] = WritePipe.Create(this, cursor, activeColumns[c]); } for (int c = 0; c < pipes.Length; ++c) { pipes[c].BeginBlock(); } long rows = 0; if (pch != null) { pch.SetHeader(new ProgressHeader(new[] { "rows" }), e => e.SetProgress(0, rows)); } while (cursor.MoveNext()) { for (int c = 0; c < pipes.Length; ++c) { pipes[c].FetchAndWrite(); } if (--remainingInBlock == 0) { for (int c = 0; c < pipes.Length; ++c) { // REVIEW: It may be better if EndBlock got moved to a different worker thread. toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token); pipes[c].BeginBlock(); } remainingInBlock = rowsPerBlock; blockIndex++; } rows++; } if (remainingInBlock < rowsPerBlock) { for (int c = 0; c < pipes.Length; ++c) { toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token); } } Contracts.Assert(rows == (blockIndex + 1) * rowsPerBlock - remainingInBlock); _rowCount = rows; if (pch != null) { pch.Checkpoint(rows); } } toCompress.CompleteAdding(); } catch (Exception ex) { exMarshaller.Set("cursoring", ex); } }