protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer) { Host.AssertValueOrNull(ch); Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < Infos.Length); disposer = null; var typeSrc = Infos[iinfo].TypeSrc; var typeDst = _exes[iinfo].TypeDst; if (!typeDst.IsVector) { return(RowCursorUtils.GetGetterAs(typeDst, input, Infos[iinfo].Source)); } return(RowCursorUtils.GetVecGetterAs(typeDst.AsVector.ItemType, input, Infos[iinfo].Source)); }
public override void InitializeNextPass(IRow row, RoleMappedSchema schema) { Contracts.Assert(PassNum < 1); Contracts.AssertValue(schema.Label); var score = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score); _labelGetter = RowCursorUtils.GetVecGetterAs <Float>(NumberType.Float, row, schema.Label.Index); _scoreGetter = row.GetGetter <VBuffer <Float> >(score.Index); Contracts.AssertValue(_labelGetter); Contracts.AssertValue(_scoreGetter); if (schema.Weight != null) { _weightGetter = row.GetGetter <Float>(schema.Weight.Index); } }
public static ValueGetter <VBuffer <Single> > GetLabelGetter(ISlotCursor cursor) { var type = cursor.GetSlotType().ItemType; if (type == NumberType.R4) { return(cursor.GetGetter <Single>()); } if (type == NumberType.R8 || type.IsBool) { return(GetVecGetterAs <Single>(NumberType.R4, cursor)); } Contracts.Check(type.IsKey, "Only floating point number, boolean, and key type values can be used as label."); Contracts.Assert(TestGetLabelGetter(type) == null); ulong keyMax = (ulong)type.KeyCount; if (keyMax == 0) { keyMax = ulong.MaxValue; } var getSrc = RowCursorUtils.GetVecGetterAs <ulong>(NumberType.U8, cursor); VBuffer <ulong> src = default(VBuffer <ulong>); return ((ref VBuffer <Single> dst) => { getSrc(ref src); // Unfortunately defaults in one to not translate to defaults of the other, // so this will not be sparsity preserving. Assume a dense output. Single[] vals = dst.Values; Utils.EnsureSize(ref vals, src.Length); foreach (var kv in src.Items(all: true)) { if (0 < kv.Value && kv.Value <= keyMax) { vals[kv.Key] = kv.Value - 1; } else { vals[kv.Key] = Single.NaN; } } dst = new VBuffer <Single>(src.Length, vals, dst.Indices); }); }
/// <summary> /// This is for the bagging case - vector input and outputs should be added. /// </summary> private ValueGetter <VBuffer <Float> > MakeGetterBag(IRow input, int iinfo) { Host.AssertValue(input); Host.Assert(Infos[iinfo].TypeSrc.IsVector); Host.Assert(Infos[iinfo].TypeSrc.ItemType.IsKey); Host.Assert(_bag[iinfo]); Host.Assert(Infos[iinfo].TypeSrc.ItemType.KeyCount == _types[iinfo].VectorSize); var info = Infos[iinfo]; int size = info.TypeSrc.ItemType.KeyCount; Host.Assert(size > 0); int cv = info.TypeSrc.VectorSize; Host.Assert(cv >= 0); var getSrc = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, input, info.Source); var src = default(VBuffer <uint>); var bldr = BufferBuilder <float> .CreateDefault(); return ((ref VBuffer <Float> dst) => { bldr.Reset(size, false); getSrc(ref src); Host.Check(cv == 0 || src.Length == cv); // The indices are irrelevant in the bagging case. var values = src.Values; int count = src.Count; for (int slot = 0; slot < count; slot++) { uint key = values[slot] - 1; if (key < size) { bldr.AddFeature((int)key, 1); } } bldr.GetResult(ref dst); }); }
private static ValueGetter <Single> GetLabelGetterNotFloat(IRow cursor, int labelIndex) { var type = cursor.Schema.GetColumnType(labelIndex); Contracts.Assert(type != NumberType.R4 && type != NumberType.R8); // boolean type label mapping: True -> 1, False -> 0. if (type.IsBool) { var getBoolSrc = cursor.GetGetter <bool>(labelIndex); return ((ref Single dst) => { bool src = default; getBoolSrc(ref src); dst = Convert.ToSingle(src); }); } Contracts.Check(type.IsKey, "Only floating point number, boolean, and key type values can be used as label."); Contracts.Assert(TestGetLabelGetter(type) == null); ulong keyMax = (ulong)type.KeyCount; if (keyMax == 0) { keyMax = ulong.MaxValue; } var getSrc = RowCursorUtils.GetGetterAs <ulong>(NumberType.U8, cursor, labelIndex); return ((ref Single dst) => { ulong src = 0; getSrc(ref src); if (0 < src && src <= keyMax) { dst = src - 1; } else { dst = Single.NaN; } }); }
public override void InitializeNextPass(IRow row, RoleMappedSchema schema) { AssertValid(assertGetters: false); Host.AssertValue(row); Host.AssertValue(schema); if (_calculateDbi) { Host.AssertValue(schema.Feature); _featGetter = row.GetGetter <VBuffer <Single> >(schema.Feature.Index); } var score = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score); Host.Assert(score.Type.VectorSize == _scoresArr.Length); _scoreGetter = row.GetGetter <VBuffer <Single> >(score.Index); if (PassNum == 0) { if (schema.Label != null) { _labelGetter = RowCursorUtils.GetLabelGetter(row, schema.Label.Index); } else { _labelGetter = (ref Single value) => value = Single.NaN; } if (schema.Weight != null) { _weightGetter = row.GetGetter <Single>(schema.Weight.Index); } } else { Host.Assert(PassNum == 1 && _calculateDbi); UnweightedCounters.InitializeSecondPass(_clusterCentroids); if (WeightedCounters != null) { WeightedCounters.InitializeSecondPass(_clusterCentroids); } } AssertValid(assertGetters: true); }
private static IColumn CloneColumnCore <T>(IColumn column) { Contracts.Assert(column is IColumn <T>); IRow meta = column.Metadata; if (meta != null) { meta = RowCursorUtils.CloneRow(meta); } var tcolumn = (IColumn <T>)column; if (!tcolumn.IsActive) { return(new InactiveImpl <T>(tcolumn.Name, meta, tcolumn.Type)); } T val = default(T); tcolumn.GetGetter()(ref val); return(GetColumn(tcolumn.Name, tcolumn.Type, ref val, meta)); }
/// <summary> /// This is for the singleton case. This should be equivalent to both Bag and Ord over /// a vector of size one. /// </summary> private ValueGetter <VBuffer <Float> > MakeGetterOne(IRow input, int iinfo) { Host.AssertValue(input); Host.Assert(Infos[iinfo].TypeSrc.IsKey); Host.Assert(Infos[iinfo].TypeSrc.KeyCount == _types[iinfo].VectorSize); int size = Infos[iinfo].TypeSrc.KeyCount; Host.Assert(size > 0); var getSrc = RowCursorUtils.GetGetterAs <uint>(NumberType.U4, input, Infos[iinfo].Source); var src = default(uint); return ((ref VBuffer <Float> dst) => { getSrc(ref src); if (src == 0 || src > size) { dst = new VBuffer <Float>(size, 0, dst.Values, dst.Indices); return; } var values = dst.Values; var indices = dst.Indices; if (Utils.Size(values) < 1) { values = new Float[1]; } if (Utils.Size(indices) < 1) { indices = new int[1]; } values[0] = 1; indices[0] = (int)src - 1; dst = new VBuffer <Float>(size, 1, values, indices); }); }
private Delegate MakeGetter(IChannel ch, IRow input, int iinfo, FinderDecorator decorator = null) { ch.Assert(_bindings.Infos[iinfo].SrcTypes.All(t => t.IsVector && t.ItemType.IsKey)); var info = _bindings.Infos[iinfo]; int srcCount = info.SrcIndices.Length; ValueGetter <VBuffer <uint> >[] getSrc = new ValueGetter <VBuffer <uint> > [srcCount]; for (int isrc = 0; isrc < srcCount; isrc++) { getSrc[isrc] = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, input, info.SrcIndices[isrc]); } var src = default(VBuffer <uint>); var ngramIdFinder = GetNgramIdFinder(iinfo); if (decorator != null) { ngramIdFinder = decorator(iinfo, ngramIdFinder); } var bldr = new NgramBufferBuilder(_exes[iinfo].NgramLength, _exes[iinfo].SkipLength, _bindings.Types[iinfo].ValueCount, ngramIdFinder); var keyCounts = _bindings.Infos[iinfo].SrcTypes.Select( t => t.ItemType.KeyCount > 0 ? (uint)t.ItemType.KeyCount : uint.MaxValue).ToArray(); // REVIEW: Special casing the srcCount==1 case could potentially improve perf. ValueGetter <VBuffer <Float> > del = (ref VBuffer <Float> dst) => { bldr.Reset(); for (int i = 0; i < srcCount; i++) { getSrc[i](ref src); bldr.AddNgrams(ref src, i, keyCounts[i]); } bldr.GetResult(ref dst); }; return(del); }
public override Delegate[] CreateGetters(IRow input, Func <int, bool> activeCols, out Action disposer) { Host.Assert(LabelIndex >= 0); Host.Assert(ScoreIndex >= 0); disposer = null; long cachedPosition = -1; var label = default(VBuffer <Float>); var score = default(VBuffer <Float>); ValueGetter <VBuffer <Float> > nullGetter = (ref VBuffer <Float> vec) => vec = default(VBuffer <Float>); var labelGetter = activeCols(LabelOutput) || activeCols(L1Output) || activeCols(L2Output) || activeCols(DistCol) ? RowCursorUtils.GetVecGetterAs <Float>(NumberType.Float, input, LabelIndex) : nullGetter; var scoreGetter = activeCols(ScoreOutput) || activeCols(L1Output) || activeCols(L2Output) || activeCols(DistCol) ? input.GetGetter <VBuffer <Float> >(ScoreIndex) : nullGetter; Action updateCacheIfNeeded = () => { if (cachedPosition != input.Position) { labelGetter(ref label); scoreGetter(ref score); cachedPosition = input.Position; } }; var getters = new Delegate[5]; if (activeCols(LabelOutput)) { ValueGetter <VBuffer <Float> > labelFn = (ref VBuffer <Float> dst) => { updateCacheIfNeeded(); label.CopyTo(ref dst); }; getters[LabelOutput] = labelFn; } if (activeCols(ScoreOutput)) { ValueGetter <VBuffer <Float> > scoreFn = (ref VBuffer <Float> dst) => { updateCacheIfNeeded(); score.CopyTo(ref dst); }; getters[ScoreOutput] = scoreFn; } if (activeCols(L1Output)) { ValueGetter <double> l1Fn = (ref double dst) => { updateCacheIfNeeded(); dst = VectorUtils.L1Distance(ref label, ref score); }; getters[L1Output] = l1Fn; } if (activeCols(L2Output)) { ValueGetter <double> l2Fn = (ref double dst) => { updateCacheIfNeeded(); dst = VectorUtils.L2DistSquared(ref label, ref score); }; getters[L2Output] = l2Fn; } if (activeCols(DistCol)) { ValueGetter <double> distFn = (ref double dst) => { updateCacheIfNeeded(); dst = MathUtils.Sqrt(VectorUtils.L2DistSquared(ref label, ref score)); }; getters[DistCol] = distFn; } return(getters); }
/// <summary> /// This is for the indicator (non-bagging) case - vector input and outputs should be concatenated. /// </summary> private ValueGetter <VBuffer <Float> > MakeGetterInd(IRow input, int iinfo) { Host.AssertValue(input); Host.Assert(Infos[iinfo].TypeSrc.IsVector); Host.Assert(Infos[iinfo].TypeSrc.ItemType.IsKey); Host.Assert(!_bag[iinfo]); var info = Infos[iinfo]; int size = info.TypeSrc.ItemType.KeyCount; Host.Assert(size > 0); int cv = info.TypeSrc.VectorSize; Host.Assert(cv >= 0); Host.Assert(_types[iinfo].VectorSize == size * cv); var getSrc = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, input, info.Source); var src = default(VBuffer <uint>); return ((ref VBuffer <Float> dst) => { getSrc(ref src); int lenSrc = src.Length; Host.Check(lenSrc == cv || cv == 0); // Since we generate values in order, no need for a builder. var valuesDst = dst.Values; var indicesDst = dst.Indices; int lenDst = checked (size * lenSrc); int cntSrc = src.Count; if (Utils.Size(valuesDst) < cntSrc) { valuesDst = new Float[cntSrc]; } if (Utils.Size(indicesDst) < cntSrc) { indicesDst = new int[cntSrc]; } var values = src.Values; int count = 0; if (src.IsDense) { Host.Assert(lenSrc == cntSrc); for (int slot = 0; slot < cntSrc; slot++) { Host.Assert(count < cntSrc); uint key = values[slot] - 1; if (key >= (uint)size) { continue; } valuesDst[count] = 1; indicesDst[count++] = slot * size + (int)key; } } else { var indices = src.Indices; for (int islot = 0; islot < cntSrc; islot++) { Host.Assert(count < cntSrc); uint key = values[islot] - 1; if (key >= (uint)size) { continue; } valuesDst[count] = 1; indicesDst[count++] = indices[islot] * size + (int)key; } } dst = new VBuffer <Float>(lenDst, count, valuesDst, indicesDst); }); }
public override Delegate[] CreateGetters(IRow input, Func <int, bool> activeCols, out Action disposer) { Host.Assert(LabelIndex >= 0); Host.Assert(ScoreIndex >= 0); disposer = null; long cachedPosition = -1; Float label = 0; var score = default(VBuffer <Float>); var l1 = VBufferUtils.CreateDense <Double>(_scoreSize); ValueGetter <Float> nanGetter = (ref Float value) => value = Single.NaN; var labelGetter = activeCols(L1Col) || activeCols(L2Col) ? RowCursorUtils.GetLabelGetter(input, LabelIndex) : nanGetter; ValueGetter <VBuffer <Float> > scoreGetter; if (activeCols(L1Col) || activeCols(L2Col)) { scoreGetter = input.GetGetter <VBuffer <Float> >(ScoreIndex); } else { scoreGetter = (ref VBuffer <Float> dst) => dst = default(VBuffer <Float>); } Action updateCacheIfNeeded = () => { if (cachedPosition != input.Position) { labelGetter(ref label); scoreGetter(ref score); var lab = (Double)label; foreach (var s in score.Items(all: true)) { l1.Values[s.Key] = Math.Abs(lab - s.Value); } cachedPosition = input.Position; } }; var getters = new Delegate[2]; if (activeCols(L1Col)) { ValueGetter <VBuffer <Double> > l1Fn = (ref VBuffer <Double> dst) => { updateCacheIfNeeded(); l1.CopyTo(ref dst); }; getters[L1Col] = l1Fn; } if (activeCols(L2Col)) { VBufferUtils.PairManipulator <Double, Double> sqr = (int slot, Double x, ref Double y) => y = x * x; ValueGetter <VBuffer <Double> > l2Fn = (ref VBuffer <Double> dst) => { updateCacheIfNeeded(); dst = new VBuffer <Double>(_scoreSize, 0, dst.Values, dst.Indices); VBufferUtils.ApplyWith(ref l1, ref dst, sqr); }; getters[L2Col] = l2Fn; } return(getters); }
protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer) { Host.AssertValueOrNull(ch); Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(Infos[iinfo].TypeSrc.IsVector); Host.Assert(Infos[iinfo].TypeSrc.ItemType.IsKey); disposer = null; var getSrc = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, input, Infos[iinfo].Source); var src = default(VBuffer <uint>); var bldr = new NgramBufferBuilder(_exes[iinfo].NgramLength, _exes[iinfo].SkipLength, _ngramMaps[iinfo].Count, GetNgramIdFinder(iinfo)); var keyCount = (uint)Infos[iinfo].TypeSrc.ItemType.KeyCount; if (keyCount == 0) { keyCount = uint.MaxValue; } ValueGetter <VBuffer <Float> > del; switch (_exes[iinfo].Weighting) { case WeightingCriteria.TfIdf: Host.AssertValue(_invDocFreqs[iinfo]); del = (ref VBuffer <Float> dst) => { getSrc(ref src); if (!bldr.IsEmpty) { bldr.Reset(); bldr.AddNgrams(ref src, 0, keyCount); bldr.GetResult(ref dst); VBufferUtils.Apply(ref dst, (int i, ref Float v) => v = (Float)(v * _invDocFreqs[iinfo][i])); } else { dst = new VBuffer <Float>(0, dst.Values, dst.Indices); } }; break; case WeightingCriteria.Idf: Host.AssertValue(_invDocFreqs[iinfo]); del = (ref VBuffer <Float> dst) => { getSrc(ref src); if (!bldr.IsEmpty) { bldr.Reset(); bldr.AddNgrams(ref src, 0, keyCount); bldr.GetResult(ref dst); VBufferUtils.Apply(ref dst, (int i, ref Float v) => v = v >= 1 ? (Float)_invDocFreqs[iinfo][i] : 0); } else { dst = new VBuffer <Float>(0, dst.Values, dst.Indices); } }; break; case WeightingCriteria.Tf: del = (ref VBuffer <Float> dst) => { getSrc(ref src); if (!bldr.IsEmpty) { bldr.Reset(); bldr.AddNgrams(ref src, 0, keyCount); bldr.GetResult(ref dst); } else { dst = new VBuffer <Float>(0, dst.Values, dst.Indices); } }; break; default: throw Host.Except("Unsupported weighting criteria"); } return(del); }
private SequencePool[] Train(Arguments args, IDataView trainingData, out double[][] invDocFreqs) { // Contains the maximum number of grams to store in the dictionary, for each level of ngrams, // from 1 (in position 0) up to ngramLength (in position ngramLength-1) var lims = new int[Infos.Length][]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { var all = args.Column[iinfo].AllLengths ?? args.AllLengths; var ngramLength = _exes[iinfo].NgramLength; var maxNumTerms = Utils.Size(args.Column[iinfo].MaxNumTerms) > 0 ? args.Column[iinfo].MaxNumTerms : args.MaxNumTerms; if (!all) { Host.CheckUserArg(Utils.Size(maxNumTerms) == 0 || Utils.Size(maxNumTerms) == 1 && maxNumTerms[0] > 0, nameof(args.MaxNumTerms)); lims[iinfo] = new int[ngramLength]; lims[iinfo][ngramLength - 1] = Utils.Size(maxNumTerms) == 0 ? Arguments.DefaultMaxTerms : maxNumTerms[0]; } else { Host.CheckUserArg(Utils.Size(maxNumTerms) <= ngramLength, nameof(args.MaxNumTerms)); Host.CheckUserArg(Utils.Size(maxNumTerms) == 0 || maxNumTerms.All(i => i >= 0) && maxNumTerms[maxNumTerms.Length - 1] > 0, nameof(args.MaxNumTerms)); var extend = Utils.Size(maxNumTerms) == 0 ? Arguments.DefaultMaxTerms : maxNumTerms[maxNumTerms.Length - 1]; lims[iinfo] = Utils.BuildArray(ngramLength, i => i < Utils.Size(maxNumTerms) ? maxNumTerms[i] : extend); } } var helpers = new NgramBufferBuilder[Infos.Length]; var getters = new ValueGetter <VBuffer <uint> > [Infos.Length]; var src = new VBuffer <uint> [Infos.Length]; // Keep track of how many grams are in the pool for each value of n. Position // i in _counts counts how many (i+1)-grams are in the pool for column iinfo. var counts = new int[Infos.Length][]; var ngramMaps = new SequencePool[Infos.Length]; bool[] activeInput = new bool[trainingData.Schema.ColumnCount]; foreach (var info in Infos) { activeInput[info.Source] = true; } using (var cursor = trainingData.GetRowCursor(col => activeInput[col])) using (var pch = Host.StartProgressChannel("Building n-gram dictionary")) { for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { Host.Assert(Infos[iinfo].TypeSrc.IsVector && Infos[iinfo].TypeSrc.ItemType.IsKey); var ngramLength = _exes[iinfo].NgramLength; var skipLength = _exes[iinfo].SkipLength; getters[iinfo] = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, cursor, Infos[iinfo].Source); src[iinfo] = default(VBuffer <uint>); counts[iinfo] = new int[ngramLength]; ngramMaps[iinfo] = new SequencePool(); // Note: GetNgramIdFinderAdd will control how many ngrams of a specific length will // be added (using lims[iinfo]), therefore we set slotLim to the maximum helpers[iinfo] = new NgramBufferBuilder(ngramLength, skipLength, Utils.ArrayMaxSize, GetNgramIdFinderAdd(counts[iinfo], lims[iinfo], ngramMaps[iinfo], _exes[iinfo].RequireIdf(), Host)); } int cInfoFull = 0; bool[] infoFull = new bool[Infos.Length]; invDocFreqs = new double[Infos.Length][]; long totalDocs = 0; Double rowCount = trainingData.GetRowCount(true) ?? Double.NaN; var buffers = new VBuffer <float> [Infos.Length]; pch.SetHeader(new ProgressHeader(new[] { "Total n-grams" }, new[] { "documents" }), e => e.SetProgress(0, totalDocs, rowCount)); while (cInfoFull < Infos.Length && cursor.MoveNext()) { totalDocs++; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { getters[iinfo](ref src[iinfo]); var keyCount = (uint)Infos[iinfo].TypeSrc.ItemType.KeyCount; if (keyCount == 0) { keyCount = uint.MaxValue; } if (!infoFull[iinfo]) { if (_exes[iinfo].RequireIdf()) { helpers[iinfo].Reset(); } helpers[iinfo].AddNgrams(ref src[iinfo], 0, keyCount); if (_exes[iinfo].RequireIdf()) { int totalNgrams = counts[iinfo].Sum(); Utils.EnsureSize(ref invDocFreqs[iinfo], totalNgrams); helpers[iinfo].GetResult(ref buffers[iinfo]); foreach (var pair in buffers[iinfo].Items()) { if (pair.Value >= 1) { invDocFreqs[iinfo][pair.Key] += 1; } } } } AssertValid(counts[iinfo], lims[iinfo], ngramMaps[iinfo]); } } pch.Checkpoint(counts.Sum(c => c.Sum()), totalDocs); for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { for (int i = 0; i < Utils.Size(invDocFreqs[iinfo]); i++) { if (invDocFreqs[iinfo][i] != 0) { invDocFreqs[iinfo][i] = Math.Log(totalDocs / invDocFreqs[iinfo][i]); } } } for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { AssertValid(counts[iinfo], lims[iinfo], ngramMaps[iinfo]); int ngramLength = _exes[iinfo].NgramLength; for (int i = 0; i < ngramLength; i++) { _exes[iinfo].NonEmptyLevels[i] = counts[iinfo][i] > 0; } } return(ngramMaps); } }
// The multi-output regression evaluator prints only the per-label metrics for each fold. protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics) { IDataView fold; if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out fold)) { throw ch.Except("No overall metrics found"); } int isWeightedCol; bool needWeighted = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.IsWeighted, out isWeightedCol); int stratCol; bool hasStrats = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol); int stratVal; bool hasStratVals = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal); ch.Assert(hasStrats == hasStratVals); var colCount = fold.Schema.ColumnCount; var vBufferGetters = new ValueGetter <VBuffer <double> > [colCount]; using (var cursor = fold.GetRowCursor(col => true)) { DvBool isWeighted = DvBool.False; ValueGetter <DvBool> isWeightedGetter; if (needWeighted) { isWeightedGetter = cursor.GetGetter <DvBool>(isWeightedCol); } else { isWeightedGetter = (ref DvBool dst) => dst = DvBool.False; } ValueGetter <uint> stratGetter; if (hasStrats) { var type = cursor.Schema.GetColumnType(stratCol); stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol); } else { stratGetter = (ref uint dst) => dst = 0; } int labelCount = 0; for (int i = 0; i < fold.Schema.ColumnCount; i++) { if (fold.Schema.IsHidden(i) || (needWeighted && i == isWeightedCol) || (hasStrats && (i == stratCol || i == stratVal))) { continue; } var type = fold.Schema.GetColumnType(i); if (type.IsKnownSizeVector && type.ItemType == NumberType.R8) { vBufferGetters[i] = cursor.GetGetter <VBuffer <double> >(i); if (labelCount == 0) { labelCount = type.VectorSize; } else { ch.Check(labelCount == type.VectorSize, "All vector metrics should contain the same number of slots"); } } } var labelNames = new DvText[labelCount]; for (int j = 0; j < labelCount; j++) { labelNames[j] = new DvText(string.Format("Label_{0}", j)); } var sb = new StringBuilder(); sb.AppendLine("Per-label metrics:"); sb.AppendFormat("{0,12} ", " "); for (int i = 0; i < labelCount; i++) { sb.AppendFormat(" {0,20}", labelNames[i]); } sb.AppendLine(); VBuffer <Double> metricVals = default(VBuffer <Double>); bool foundWeighted = !needWeighted; bool foundUnweighted = false; uint strat = 0; while (cursor.MoveNext()) { isWeightedGetter(ref isWeighted); if (foundWeighted && isWeighted.IsTrue || foundUnweighted && isWeighted.IsFalse) { throw ch.Except("Multiple {0} rows found in overall metrics data view", isWeighted.IsTrue ? "weighted" : "unweighted"); } if (isWeighted.IsTrue) { foundWeighted = true; } else { foundUnweighted = true; } stratGetter(ref strat); if (strat > 0) { continue; } for (int i = 0; i < colCount; i++) { if (vBufferGetters[i] != null) { vBufferGetters[i](ref metricVals); ch.Assert(metricVals.Length == labelCount); sb.AppendFormat("{0}{1,12}:", isWeighted.IsTrue ? "Weighted " : "", fold.Schema.GetColumnName(i)); foreach (var metric in metricVals.Items(all: true)) { sb.AppendFormat(" {0,20:G20}", metric.Value); } sb.AppendLine(); } } if (foundUnweighted && foundWeighted) { break; } } ch.Assert(foundUnweighted && foundWeighted); ch.Info(sb.ToString()); } }
protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics) { IDataView top; if (!metrics.TryGetValue(AnomalyDetectionEvaluator.TopKResults, out top)) { throw Host.Except("Did not find the top-k results data view"); } var sb = new StringBuilder(); using (var cursor = top.GetRowCursor(col => true)) { int index; if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Instance, out index)) { throw Host.Except("Data view does not contain the 'Instance' column"); } var instanceGetter = cursor.GetGetter <ReadOnlyMemory <char> >(index); if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.AnomalyScore, out index)) { throw Host.Except("Data view does not contain the 'Anomaly Score' column"); } var scoreGetter = cursor.GetGetter <Single>(index); if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Label, out index)) { throw Host.Except("Data view does not contain the 'Label' column"); } var labelGetter = cursor.GetGetter <Single>(index); bool hasRows = false; while (cursor.MoveNext()) { if (!hasRows) { sb.AppendFormat("{0} Top-scored Results", _topScored); sb.AppendLine(); sb.AppendLine("================================================="); sb.AppendLine("Instance Anomaly Score Labeled"); hasRows = true; } var name = default(ReadOnlyMemory <char>); Single score = 0; Single label = 0; instanceGetter(ref name); scoreGetter(ref score); labelGetter(ref label); sb.AppendFormat("{0,-10}{1,12:G4}{2,12}", name, score, label); sb.AppendLine(); } } if (sb.Length > 0) { ch.Info(MessageSensitivity.UserData, sb.ToString()); } IDataView overall; if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out overall)) { throw Host.Except("No overall metrics found"); } // Find the number of anomalies, and the thresholds. int numAnomIndex; if (!overall.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.OverallMetrics.NumAnomalies, out numAnomIndex)) { throw Host.Except("Could not find the 'NumAnomalies' column"); } int stratCol; var hasStrat = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol); int stratVal; bool hasStratVals = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal); Contracts.Assert(hasStrat == hasStratVals); long numAnomalies = 0; using (var cursor = overall.GetRowCursor(col => col == numAnomIndex || (hasStrat && col == stratCol))) { var numAnomGetter = cursor.GetGetter <long>(numAnomIndex); ValueGetter <uint> stratGetter = null; if (hasStrat) { var type = cursor.Schema.GetColumnType(stratCol); stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol); } bool foundRow = false; while (cursor.MoveNext()) { uint strat = 0; if (stratGetter != null) { stratGetter(ref strat); } if (strat > 0) { continue; } if (foundRow) { throw Host.Except("Found multiple non-stratified rows in overall results data view"); } foundRow = true; numAnomGetter(ref numAnomalies); } } var kFormatName = string.Format(FoldDrAtKFormat, _k); var pFormatName = string.Format(FoldDrAtPFormat, _p); var numAnomName = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies); (string Source, string Name)[] cols =
protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics) { IDataView top; if (!metrics.TryGetValue(AnomalyDetectionEvaluator.TopKResults, out top)) { throw Host.Except("Did not find the top-k results data view"); } var sb = new StringBuilder(); using (var cursor = top.GetRowCursor(col => true)) { int index; if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Instance, out index)) { throw Host.Except("Data view does not contain the 'Instance' column"); } var instanceGetter = cursor.GetGetter <DvText>(index); if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.AnomalyScore, out index)) { throw Host.Except("Data view does not contain the 'Anomaly Score' column"); } var scoreGetter = cursor.GetGetter <Single>(index); if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Label, out index)) { throw Host.Except("Data view does not contain the 'Label' column"); } var labelGetter = cursor.GetGetter <Single>(index); bool hasRows = false; while (cursor.MoveNext()) { if (!hasRows) { sb.AppendFormat("{0} Top-scored Results", _topScored); sb.AppendLine(); sb.AppendLine("================================================="); sb.AppendLine("Instance Anomaly Score Labeled"); hasRows = true; } var name = default(DvText); Single score = 0; Single label = 0; instanceGetter(ref name); scoreGetter(ref score); labelGetter(ref label); sb.AppendFormat("{0,-10}{1,12:G4}{2,12}", name, score, label); sb.AppendLine(); } } if (sb.Length > 0) { ch.Info(MessageSensitivity.UserData, sb.ToString()); } IDataView overall; if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out overall)) { throw Host.Except("No overall metrics found"); } // Find the number of anomalies, and the thresholds. int numAnomIndex; if (!overall.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.OverallMetrics.NumAnomalies, out numAnomIndex)) { throw Host.Except("Could not find the 'NumAnomalies' column"); } int stratCol; var hasStrat = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol); int stratVal; bool hasStratVals = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal); Contracts.Assert(hasStrat == hasStratVals); DvInt8 numAnomalies = 0; using (var cursor = overall.GetRowCursor(col => col == numAnomIndex || (hasStrat && col == stratCol))) { var numAnomGetter = cursor.GetGetter <DvInt8>(numAnomIndex); ValueGetter <uint> stratGetter = null; if (hasStrat) { var type = cursor.Schema.GetColumnType(stratCol); stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol); } bool foundRow = false; while (cursor.MoveNext()) { uint strat = 0; if (stratGetter != null) { stratGetter(ref strat); } if (strat > 0) { continue; } if (foundRow) { throw Host.Except("Found multiple non-stratified rows in overall results data view"); } foundRow = true; numAnomGetter(ref numAnomalies); } } var args = new ChooseColumnsTransform.Arguments(); var cols = new List <ChooseColumnsTransform.Column>() { new ChooseColumnsTransform.Column() { Name = string.Format(FoldDrAtKFormat, _k), Source = AnomalyDetectionEvaluator.OverallMetrics.DrAtK }, new ChooseColumnsTransform.Column() { Name = string.Format(FoldDrAtPFormat, _p), Source = AnomalyDetectionEvaluator.OverallMetrics.DrAtPFpr }, new ChooseColumnsTransform.Column() { Name = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies), Source = AnomalyDetectionEvaluator.OverallMetrics.DrAtNumPos }, new ChooseColumnsTransform.Column() { Name = AnomalyDetectionEvaluator.OverallMetrics.ThreshAtK }, new ChooseColumnsTransform.Column() { Name = AnomalyDetectionEvaluator.OverallMetrics.ThreshAtP }, new ChooseColumnsTransform.Column() { Name = AnomalyDetectionEvaluator.OverallMetrics.ThreshAtNumPos }, new ChooseColumnsTransform.Column() { Name = BinaryClassifierEvaluator.Auc } }; args.Column = cols.ToArray(); IDataView fold = new ChooseColumnsTransform(Host, args, overall); string weightedFold; ch.Info(MetricWriter.GetPerFoldResults(Host, fold, out weightedFold)); }