internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult, int topKPredictionCount, IDataView confusionMatrix) { double FetchDouble(string name) => RowCursorUtils.Fetch<double>(host, overallResult, name); MicroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMicro); MacroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMacro); LogLoss = FetchDouble(MulticlassClassificationEvaluator.LogLoss); LogLossReduction = FetchDouble(MulticlassClassificationEvaluator.LogLossReduction); TopKPredictionCount = topKPredictionCount; if (topKPredictionCount > 0) TopKAccuracy = FetchDouble(MulticlassClassificationEvaluator.TopKAccuracy); var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.PerClassLogLoss); PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray(); ConfusionMatrix = MetricWriter.GetConfusionMatrix(host, confusionMatrix, binary: false, perClassLogLoss.Length); }
private static ValueGetter <Single> GetLabelGetterNotFloat(DataViewRow cursor, int labelIndex) { var type = cursor.Schema[labelIndex].Type; Contracts.Assert(type != NumberDataViewType.Single && type != NumberDataViewType.Double); // boolean type label mapping: True -> 1, False -> 0. if (type is BooleanDataViewType) { var getBoolSrc = cursor.GetGetter <bool>(labelIndex); return ((ref Single dst) => { bool src = default; getBoolSrc(ref src); dst = Convert.ToSingle(src); }); } if (!(type is KeyType keyType)) { throw Contracts.Except("Only floating point number, boolean, and key type values can be used as label."); } Contracts.Assert(TestGetLabelGetter(type) == null); ulong keyMax = (ulong)keyType.Count; if (keyMax == 0) { keyMax = ulong.MaxValue; } var getSrc = RowCursorUtils.GetGetterAs <ulong>(NumberDataViewType.UInt64, cursor, labelIndex); return ((ref Single dst) => { ulong src = 0; getSrc(ref src); if (0 < src && src <= keyMax) { dst = src - 1; } else { dst = Single.NaN; } }); }
protected override RowCursor GetRowCursorCore(IEnumerable <Schema.Column> columnsNeeded, Random rand = null) { Contracts.AssertValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); var bindings = GetBindings(); Func <int, bool> inputPred; Func <int, bool> predicateMapper; var active = GetActive(bindings, predicate, out inputPred, out predicateMapper); var inputCols = Source.Schema.Where(x => inputPred(x.Index)); var input = Source.GetRowCursor(inputCols, rand); return(new Cursor(Host, this, input, active, predicateMapper)); }
internal static ValueGetter <VBuffer <Single> > GetLabelGetter(SlotCursor cursor) { var type = cursor.GetSlotType().ItemType; if (type == NumberDataViewType.Single) { return(cursor.GetGetter <Single>()); } if (type == NumberDataViewType.Double || type is BooleanDataViewType) { return(GetVecGetterAs <Single>(NumberDataViewType.Single, cursor)); } if (!(type is KeyType keyType)) { throw Contracts.Except("Only floating point number, boolean, and key type values can be used as label."); } Contracts.Assert(TestGetLabelGetter(type) == null); ulong keyMax = (ulong)keyType.Count; if (keyMax == 0) { keyMax = ulong.MaxValue; } var getSrc = RowCursorUtils.GetVecGetterAs <ulong>(NumberDataViewType.UInt64, cursor); VBuffer <ulong> src = default(VBuffer <ulong>); return ((ref VBuffer <Single> dst) => { getSrc(ref src); // Unfortunately defaults in one to not translate to defaults of the other, // so this will not be sparsity preserving. Assume a dense output. var editor = VBufferEditor.Create(ref dst, src.Length); foreach (var kv in src.Items(all: true)) { if (0 < kv.Value && kv.Value <= keyMax) { editor.Values[kv.Key] = kv.Value - 1; } else { editor.Values[kv.Key] = Single.NaN; } } dst = editor.Commit(); }); }
internal override void InitializeNextPass(Row row, RoleMappedSchema schema) { Contracts.Assert(PassNum < 1); Contracts.Assert(schema.Label.HasValue); var score = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score); _labelGetter = RowCursorUtils.GetVecGetterAs <Float>(NumberType.Float, row, schema.Label.Value.Index); _scoreGetter = row.GetGetter <VBuffer <Float> >(score.Index); Contracts.AssertValue(_labelGetter); Contracts.AssertValue(_scoreGetter); if (schema.Weight.HasValue) { _weightGetter = row.GetGetter <Float>(schema.Weight.Value.Index); } }
public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null) { var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, Schema); _host.CheckValueOrNull(rand); var srcPredicates = _zipBinding.GetInputPredicates(predicate); // REVIEW: if we know the row counts, we could only open cursor if it has needed columns, and have the // outer cursor handle the early stopping. If we don't know row counts, we need to open all the cursors because // we don't know which one will be the shortest. // One reason this is not done currently is because the API has 'somewhat mutable' data views, so potentially this // optimization might backfire. var srcCursors = _sources .Select((dv, i) => srcPredicates[i] == null ? GetMinimumCursor(dv) : dv.GetRowCursor(dv.Schema.Where(x => srcPredicates[i](x.Index)), null)).ToArray(); return(new Cursor(this, srcCursors, predicate)); }
internal MultiClassClassifierMetrics(IExceptionContext ectx, DataViewRow overallResult, int topK) { double FetchDouble(string name) => RowCursorUtils.Fetch <double>(ectx, overallResult, name); MicroAccuracy = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro); MacroAccuracy = FetchDouble(MultiClassClassifierEvaluator.AccuracyMacro); LogLoss = FetchDouble(MultiClassClassifierEvaluator.LogLoss); LogLossReduction = FetchDouble(MultiClassClassifierEvaluator.LogLossReduction); TopK = topK; if (topK > 0) { TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy); } var perClassLogLoss = RowCursorUtils.Fetch <VBuffer <double> >(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss); PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray(); }
private static ValueGetter <Single> GetLabelGetterNotFloat(Row cursor, int labelIndex) { var type = cursor.Schema[labelIndex].Type; Contracts.Assert(type != NumberType.R4 && type != NumberType.R8); // boolean type label mapping: True -> 1, False -> 0. if (type.IsBool) { var getBoolSrc = cursor.GetGetter <bool>(labelIndex); return ((ref Single dst) => { bool src = default; getBoolSrc(ref src); dst = Convert.ToSingle(src); }); } Contracts.Check(type.IsKey, "Only floating point number, boolean, and key type values can be used as label."); Contracts.Assert(TestGetLabelGetter(type) == null); ulong keyMax = (ulong)type.KeyCount; if (keyMax == 0) { keyMax = ulong.MaxValue; } var getSrc = RowCursorUtils.GetGetterAs <ulong>(NumberType.U8, cursor, labelIndex); return ((ref Single dst) => { ulong src = 0; getSrc(ref src); if (0 < src && src <= keyMax) { dst = src - 1; } else { dst = Single.NaN; } }); }
internal MultiClassClassifierMetrics(IExceptionContext ectx, Row overallResult, int topK) { double FetchDouble(string name) => RowCursorUtils.Fetch <double>(ectx, overallResult, name); AccuracyMicro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro); AccuracyMacro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMacro); LogLoss = FetchDouble(MultiClassClassifierEvaluator.LogLoss); LogLossReduction = FetchDouble(MultiClassClassifierEvaluator.LogLossReduction); TopK = topK; if (topK > 0) { TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy); } var perClassLogLoss = RowCursorUtils.Fetch <VBuffer <double> >(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss); PerClassLogLoss = new double[perClassLogLoss.Length]; perClassLogLoss.CopyTo(PerClassLogLoss); }
public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null) { var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); Host.CheckValueOrNull(rand); // If we aren't selecting any of the output columns, don't construct our cursor. // Note that because we cannot support random due to the inherently // stratified nature, neither can we allow the base data to be shuffled, // even if it supports shuffling. var bindings = GetBindings(); if (!bindings.AnyNewColumnsActive(predicate)) { var activeInput = bindings.GetActiveInput(predicate); var activeCols = Source.Schema.Where(x => activeInput.Length > x.Index && activeInput[x.Index]); var inputCursor = Source.GetRowCursor(activeCols, null); return(new BindingsWrappedRowCursor(Host, inputCursor, bindings)); } return(GetRowCursorCore(predicate)); }
public override DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); bool[] active; Func <int, bool> inputPred = GetActive(predicate, out active); var inputCols = Source.Schema.Where(x => inputPred(x.Index)); var inputs = Source.GetRowCursorSet(inputCols, n, rand); Host.AssertNonEmpty(inputs); // No need to split if this is given 1 input cursor. var cursors = new DataViewRowCursor[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { cursors[i] = new Cursor(this, inputs[i], active); } return(cursors); }
public sealed override RowCursor[] GetRowCursorSet(IEnumerable<Schema.Column> columnsNeeded, int n, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); var inputPred = _bindings.GetDependencies(predicate); var active = _bindings.GetActive(predicate); var inputCols = Source.Schema.Where(x => inputPred(x.Index)); var inputs = Source.GetRowCursorSet(inputCols, n, rand); Host.AssertNonEmpty(inputs); if (inputs.Length == 1 && n > 1 && WantParallelCursors(predicate)) inputs = DataViewUtils.CreateSplitCursors(Host, inputs[0], n); Host.AssertNonEmpty(inputs); var cursors = new RowCursor[inputs.Length]; for (int i = 0; i < inputs.Length; i++) cursors[i] = new Cursor(Host, this, inputs[i], active); return cursors; }
public RowCursor GetRowCursor(IEnumerable<Schema.Column> columnsNeeded, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); var rng = CanShuffle ? rand : null; bool? useParallel = ShouldUseParallelCursors(predicate); // When useParallel is null, let the input decide, so go ahead and ask for parallel. // When the input wants to be split, this puts the consolidation after this transform // instead of before. This is likely to produce better performance, for example, when // this is RangeFilter. RowCursor curs; if (useParallel != false && DataViewUtils.TryCreateConsolidatingCursor(out curs, this, columnsNeeded, Host, rng)) { return curs; } return GetRowCursorCore(columnsNeeded, rng); }
public sealed override RowCursor[] GetRowCursorSet(IEnumerable <Schema.Column> columnsNeeded, int n, Random rand = null) { Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema); var inputPred = _bindings.GetDependencies(predicate); var active = _bindings.GetActive(predicate); var inputCols = Source.Schema.Where(x => inputPred(x.Index)); var inputs = Source.GetRowCursorSet(inputCols, n, rand); Host.AssertNonEmpty(inputs); // No need to split if this is given 1 input cursor. var cursors = new RowCursor[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { cursors[i] = new Cursor(Host, _bindings, inputs[i], active); } return(cursors); }
private protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics) { IDataView top; if (!metrics.TryGetValue(AnomalyDetectionEvaluator.TopKResults, out top)) { throw Host.Except("Did not find the top-k results data view"); } var sb = new StringBuilder(); using (var cursor = top.GetRowCursor(col => true)) { int index; if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Instance, out index)) { throw Host.Except("Data view does not contain the 'Instance' column"); } var instanceGetter = cursor.GetGetter <ReadOnlyMemory <char> >(index); if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.AnomalyScore, out index)) { throw Host.Except("Data view does not contain the 'Anomaly Score' column"); } var scoreGetter = cursor.GetGetter <Single>(index); if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Label, out index)) { throw Host.Except("Data view does not contain the 'Label' column"); } var labelGetter = cursor.GetGetter <Single>(index); bool hasRows = false; while (cursor.MoveNext()) { if (!hasRows) { sb.AppendFormat("{0} Top-scored Results", _topScored); sb.AppendLine(); sb.AppendLine("================================================="); sb.AppendLine("Instance Anomaly Score Labeled"); hasRows = true; } var name = default(ReadOnlyMemory <char>); Single score = 0; Single label = 0; instanceGetter(ref name); scoreGetter(ref score); labelGetter(ref label); sb.AppendFormat("{0,-10}{1,12:G4}{2,12}", name, score, label); sb.AppendLine(); } } if (sb.Length > 0) { ch.Info(MessageSensitivity.UserData, sb.ToString()); } IDataView overall; if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out overall)) { throw Host.Except("No overall metrics found"); } // Find the number of anomalies, and the thresholds. int numAnomIndex; if (!overall.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.OverallMetrics.NumAnomalies, out numAnomIndex)) { throw Host.Except("Could not find the 'NumAnomalies' column"); } int stratCol; var hasStrat = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol); int stratVal; bool hasStratVals = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal); Contracts.Assert(hasStrat == hasStratVals); long numAnomalies = 0; using (var cursor = overall.GetRowCursor(col => col == numAnomIndex || (hasStrat && col == stratCol))) { var numAnomGetter = cursor.GetGetter <long>(numAnomIndex); ValueGetter <uint> stratGetter = null; if (hasStrat) { var type = cursor.Schema[stratCol].Type; stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol); } bool foundRow = false; while (cursor.MoveNext()) { uint strat = 0; if (stratGetter != null) { stratGetter(ref strat); } if (strat > 0) { continue; } if (foundRow) { throw Host.Except("Found multiple non-stratified rows in overall results data view"); } foundRow = true; numAnomGetter(ref numAnomalies); } } var kFormatName = string.Format(FoldDrAtKFormat, _k); var pFormatName = string.Format(FoldDrAtPFormat, _p); var numAnomName = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies); (string Source, string Name)[] cols =
private protected override Delegate[] CreateGettersCore(Row input, Func <int, bool> activeCols, out Action disposer) { Host.Assert(LabelIndex >= 0); Host.Assert(ScoreIndex >= 0); disposer = null; long cachedPosition = -1; var label = default(VBuffer <Float>); var score = default(VBuffer <Float>); ValueGetter <VBuffer <Float> > nullGetter = (ref VBuffer <Float> vec) => vec = default(VBuffer <Float>); var labelGetter = activeCols(LabelOutput) || activeCols(L1Output) || activeCols(L2Output) || activeCols(DistCol) ? RowCursorUtils.GetVecGetterAs <Float>(NumberType.Float, input, LabelIndex) : nullGetter; var scoreGetter = activeCols(ScoreOutput) || activeCols(L1Output) || activeCols(L2Output) || activeCols(DistCol) ? input.GetGetter <VBuffer <Float> >(ScoreIndex) : nullGetter; Action updateCacheIfNeeded = () => { if (cachedPosition != input.Position) { labelGetter(ref label); scoreGetter(ref score); cachedPosition = input.Position; } }; var getters = new Delegate[5]; if (activeCols(LabelOutput)) { ValueGetter <VBuffer <Float> > labelFn = (ref VBuffer <Float> dst) => { updateCacheIfNeeded(); label.CopyTo(ref dst); }; getters[LabelOutput] = labelFn; } if (activeCols(ScoreOutput)) { ValueGetter <VBuffer <Float> > scoreFn = (ref VBuffer <Float> dst) => { updateCacheIfNeeded(); score.CopyTo(ref dst); }; getters[ScoreOutput] = scoreFn; } if (activeCols(L1Output)) { ValueGetter <double> l1Fn = (ref double dst) => { updateCacheIfNeeded(); dst = VectorUtils.L1Distance(in label, in score); }; getters[L1Output] = l1Fn; }