Exemplo n.º 1
0
        internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult, int topKPredictionCount, IDataView confusionMatrix)
        {
            double FetchDouble(string name) => RowCursorUtils.Fetch<double>(host, overallResult, name);
            MicroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMicro);
            MacroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMacro);
            LogLoss = FetchDouble(MulticlassClassificationEvaluator.LogLoss);
            LogLossReduction = FetchDouble(MulticlassClassificationEvaluator.LogLossReduction);
            TopKPredictionCount = topKPredictionCount;
            if (topKPredictionCount > 0)
                TopKAccuracy = FetchDouble(MulticlassClassificationEvaluator.TopKAccuracy);

            var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.PerClassLogLoss);
            PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();
            ConfusionMatrix = MetricWriter.GetConfusionMatrix(host, confusionMatrix, binary: false, perClassLogLoss.Length);
        }
Exemplo n.º 2
0
        private static ValueGetter <Single> GetLabelGetterNotFloat(DataViewRow cursor, int labelIndex)
        {
            var type = cursor.Schema[labelIndex].Type;

            Contracts.Assert(type != NumberDataViewType.Single && type != NumberDataViewType.Double);

            // boolean type label mapping: True -> 1, False -> 0.
            if (type is BooleanDataViewType)
            {
                var getBoolSrc = cursor.GetGetter <bool>(labelIndex);
                return
                    ((ref Single dst) =>
                {
                    bool src = default;
                    getBoolSrc(ref src);
                    dst = Convert.ToSingle(src);
                });
            }

            if (!(type is KeyType keyType))
            {
                throw Contracts.Except("Only floating point number, boolean, and key type values can be used as label.");
            }

            Contracts.Assert(TestGetLabelGetter(type) == null);
            ulong keyMax = (ulong)keyType.Count;

            if (keyMax == 0)
            {
                keyMax = ulong.MaxValue;
            }
            var getSrc = RowCursorUtils.GetGetterAs <ulong>(NumberDataViewType.UInt64, cursor, labelIndex);

            return
                ((ref Single dst) =>
            {
                ulong src = 0;
                getSrc(ref src);
                if (0 < src && src <= keyMax)
                {
                    dst = src - 1;
                }
                else
                {
                    dst = Single.NaN;
                }
            });
        }
Exemplo n.º 3
0
        protected override RowCursor GetRowCursorCore(IEnumerable <Schema.Column> columnsNeeded, Random rand = null)
        {
            Contracts.AssertValueOrNull(rand);

            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            var bindings = GetBindings();
            Func <int, bool> inputPred;
            Func <int, bool> predicateMapper;
            var active = GetActive(bindings, predicate, out inputPred, out predicateMapper);

            var inputCols = Source.Schema.Where(x => inputPred(x.Index));
            var input     = Source.GetRowCursor(inputCols, rand);

            return(new Cursor(Host, this, input, active, predicateMapper));
        }
Exemplo n.º 4
0
        internal static ValueGetter <VBuffer <Single> > GetLabelGetter(SlotCursor cursor)
        {
            var type = cursor.GetSlotType().ItemType;

            if (type == NumberDataViewType.Single)
            {
                return(cursor.GetGetter <Single>());
            }
            if (type == NumberDataViewType.Double || type is BooleanDataViewType)
            {
                return(GetVecGetterAs <Single>(NumberDataViewType.Single, cursor));
            }
            if (!(type is KeyType keyType))
            {
                throw Contracts.Except("Only floating point number, boolean, and key type values can be used as label.");
            }
            Contracts.Assert(TestGetLabelGetter(type) == null);
            ulong keyMax = (ulong)keyType.Count;

            if (keyMax == 0)
            {
                keyMax = ulong.MaxValue;
            }
            var             getSrc = RowCursorUtils.GetVecGetterAs <ulong>(NumberDataViewType.UInt64, cursor);
            VBuffer <ulong> src    = default(VBuffer <ulong>);

            return
                ((ref VBuffer <Single> dst) =>
            {
                getSrc(ref src);
                // Unfortunately defaults in one to not translate to defaults of the other,
                // so this will not be sparsity preserving. Assume a dense output.
                var editor = VBufferEditor.Create(ref dst, src.Length);
                foreach (var kv in src.Items(all: true))
                {
                    if (0 < kv.Value && kv.Value <= keyMax)
                    {
                        editor.Values[kv.Key] = kv.Value - 1;
                    }
                    else
                    {
                        editor.Values[kv.Key] = Single.NaN;
                    }
                }
                dst = editor.Commit();
            });
        }
Exemplo n.º 5
0
            internal override void InitializeNextPass(Row row, RoleMappedSchema schema)
            {
                Contracts.Assert(PassNum < 1);
                Contracts.Assert(schema.Label.HasValue);

                var score = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score);

                _labelGetter = RowCursorUtils.GetVecGetterAs <Float>(NumberType.Float, row, schema.Label.Value.Index);
                _scoreGetter = row.GetGetter <VBuffer <Float> >(score.Index);
                Contracts.AssertValue(_labelGetter);
                Contracts.AssertValue(_scoreGetter);

                if (schema.Weight.HasValue)
                {
                    _weightGetter = row.GetGetter <Float>(schema.Weight.Value.Index);
                }
            }
        public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null)
        {
            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, Schema);

            _host.CheckValueOrNull(rand);

            var srcPredicates = _zipBinding.GetInputPredicates(predicate);

            // REVIEW: if we know the row counts, we could only open cursor if it has needed columns, and have the
            // outer cursor handle the early stopping. If we don't know row counts, we need to open all the cursors because
            // we don't know which one will be the shortest.
            // One reason this is not done currently is because the API has 'somewhat mutable' data views, so potentially this
            // optimization might backfire.
            var srcCursors = _sources
                             .Select((dv, i) => srcPredicates[i] == null ? GetMinimumCursor(dv) : dv.GetRowCursor(dv.Schema.Where(x => srcPredicates[i](x.Index)), null)).ToArray();

            return(new Cursor(this, srcCursors, predicate));
        }
Exemplo n.º 7
0
        internal MultiClassClassifierMetrics(IExceptionContext ectx, DataViewRow overallResult, int topK)
        {
            double FetchDouble(string name) => RowCursorUtils.Fetch <double>(ectx, overallResult, name);

            MicroAccuracy    = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro);
            MacroAccuracy    = FetchDouble(MultiClassClassifierEvaluator.AccuracyMacro);
            LogLoss          = FetchDouble(MultiClassClassifierEvaluator.LogLoss);
            LogLossReduction = FetchDouble(MultiClassClassifierEvaluator.LogLossReduction);
            TopK             = topK;
            if (topK > 0)
            {
                TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy);
            }

            var perClassLogLoss = RowCursorUtils.Fetch <VBuffer <double> >(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss);

            PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();
        }
Exemplo n.º 8
0
        private static ValueGetter <Single> GetLabelGetterNotFloat(Row cursor, int labelIndex)
        {
            var type = cursor.Schema[labelIndex].Type;

            Contracts.Assert(type != NumberType.R4 && type != NumberType.R8);

            // boolean type label mapping: True -> 1, False -> 0.
            if (type.IsBool)
            {
                var getBoolSrc = cursor.GetGetter <bool>(labelIndex);
                return
                    ((ref Single dst) =>
                {
                    bool src = default;
                    getBoolSrc(ref src);
                    dst = Convert.ToSingle(src);
                });
            }

            Contracts.Check(type.IsKey, "Only floating point number, boolean, and key type values can be used as label.");
            Contracts.Assert(TestGetLabelGetter(type) == null);
            ulong keyMax = (ulong)type.KeyCount;

            if (keyMax == 0)
            {
                keyMax = ulong.MaxValue;
            }
            var getSrc = RowCursorUtils.GetGetterAs <ulong>(NumberType.U8, cursor, labelIndex);

            return
                ((ref Single dst) =>
            {
                ulong src = 0;
                getSrc(ref src);
                if (0 < src && src <= keyMax)
                {
                    dst = src - 1;
                }
                else
                {
                    dst = Single.NaN;
                }
            });
        }
Exemplo n.º 9
0
        internal MultiClassClassifierMetrics(IExceptionContext ectx, Row overallResult, int topK)
        {
            double FetchDouble(string name) => RowCursorUtils.Fetch <double>(ectx, overallResult, name);

            AccuracyMicro    = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro);
            AccuracyMacro    = FetchDouble(MultiClassClassifierEvaluator.AccuracyMacro);
            LogLoss          = FetchDouble(MultiClassClassifierEvaluator.LogLoss);
            LogLossReduction = FetchDouble(MultiClassClassifierEvaluator.LogLossReduction);
            TopK             = topK;
            if (topK > 0)
            {
                TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy);
            }

            var perClassLogLoss = RowCursorUtils.Fetch <VBuffer <double> >(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss);

            PerClassLogLoss = new double[perClassLogLoss.Length];
            perClassLogLoss.CopyTo(PerClassLogLoss);
        }
Exemplo n.º 10
0
        public DataViewRowCursor GetRowCursor(IEnumerable <DataViewSchema.Column> columnsNeeded, Random rand = null)
        {
            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            Host.CheckValueOrNull(rand);
            // If we aren't selecting any of the output columns, don't construct our cursor.
            // Note that because we cannot support random due to the inherently
            // stratified nature, neither can we allow the base data to be shuffled,
            // even if it supports shuffling.
            var bindings = GetBindings();

            if (!bindings.AnyNewColumnsActive(predicate))
            {
                var activeInput = bindings.GetActiveInput(predicate);
                var activeCols  = Source.Schema.Where(x => activeInput.Length > x.Index && activeInput[x.Index]);
                var inputCursor = Source.GetRowCursor(activeCols, null);
                return(new BindingsWrappedRowCursor(Host, inputCursor, bindings));
            }
            return(GetRowCursorCore(predicate));
        }
Exemplo n.º 11
0
            public override DataViewRowCursor[] GetRowCursorSet(IEnumerable <DataViewSchema.Column> columnsNeeded, int n, Random rand = null)
            {
                Host.CheckValueOrNull(rand);
                var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

                bool[]           active;
                Func <int, bool> inputPred = GetActive(predicate, out active);
                var inputCols = Source.Schema.Where(x => inputPred(x.Index));
                var inputs    = Source.GetRowCursorSet(inputCols, n, rand);

                Host.AssertNonEmpty(inputs);

                // No need to split if this is given 1 input cursor.
                var cursors = new DataViewRowCursor[inputs.Length];

                for (int i = 0; i < inputs.Length; i++)
                {
                    cursors[i] = new Cursor(this, inputs[i], active);
                }
                return(cursors);
            }
Exemplo n.º 12
0
        public sealed override RowCursor[] GetRowCursorSet(IEnumerable<Schema.Column> columnsNeeded, int n, Random rand = null)
        {
            Host.CheckValueOrNull(rand);

            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            var inputPred = _bindings.GetDependencies(predicate);
            var active = _bindings.GetActive(predicate);

            var inputCols = Source.Schema.Where(x => inputPred(x.Index));
            var inputs = Source.GetRowCursorSet(inputCols, n, rand);
            Host.AssertNonEmpty(inputs);

            if (inputs.Length == 1 && n > 1 && WantParallelCursors(predicate))
                inputs = DataViewUtils.CreateSplitCursors(Host, inputs[0], n);
            Host.AssertNonEmpty(inputs);

            var cursors = new RowCursor[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
                cursors[i] = new Cursor(Host, this, inputs[i], active);
            return cursors;
        }
Exemplo n.º 13
0
        public RowCursor GetRowCursor(IEnumerable<Schema.Column> columnsNeeded, Random rand = null)
        {
            Host.CheckValueOrNull(rand);

            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            var rng = CanShuffle ? rand : null;
            bool? useParallel = ShouldUseParallelCursors(predicate);

            // When useParallel is null, let the input decide, so go ahead and ask for parallel.
            // When the input wants to be split, this puts the consolidation after this transform
            // instead of before. This is likely to produce better performance, for example, when
            // this is RangeFilter.
            RowCursor curs;
            if (useParallel != false &&
                DataViewUtils.TryCreateConsolidatingCursor(out curs, this, columnsNeeded, Host, rng))
            {
                return curs;
            }

            return GetRowCursorCore(columnsNeeded, rng);
        }
Exemplo n.º 14
0
        public sealed override RowCursor[] GetRowCursorSet(IEnumerable <Schema.Column> columnsNeeded, int n, Random rand = null)
        {
            Host.CheckValueOrNull(rand);

            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);

            var inputPred = _bindings.GetDependencies(predicate);
            var active    = _bindings.GetActive(predicate);

            var inputCols = Source.Schema.Where(x => inputPred(x.Index));
            var inputs    = Source.GetRowCursorSet(inputCols, n, rand);

            Host.AssertNonEmpty(inputs);

            // No need to split if this is given 1 input cursor.
            var cursors = new RowCursor[inputs.Length];

            for (int i = 0; i < inputs.Length; i++)
            {
                cursors[i] = new Cursor(Host, _bindings, inputs[i], active);
            }
            return(cursors);
        }
        private protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics)
        {
            IDataView top;

            if (!metrics.TryGetValue(AnomalyDetectionEvaluator.TopKResults, out top))
            {
                throw Host.Except("Did not find the top-k results data view");
            }
            var sb = new StringBuilder();

            using (var cursor = top.GetRowCursor(col => true))
            {
                int index;
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Instance, out index))
                {
                    throw Host.Except("Data view does not contain the 'Instance' column");
                }
                var instanceGetter = cursor.GetGetter <ReadOnlyMemory <char> >(index);
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.AnomalyScore, out index))
                {
                    throw Host.Except("Data view does not contain the 'Anomaly Score' column");
                }
                var scoreGetter = cursor.GetGetter <Single>(index);
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Label, out index))
                {
                    throw Host.Except("Data view does not contain the 'Label' column");
                }
                var labelGetter = cursor.GetGetter <Single>(index);

                bool hasRows = false;
                while (cursor.MoveNext())
                {
                    if (!hasRows)
                    {
                        sb.AppendFormat("{0} Top-scored Results", _topScored);
                        sb.AppendLine();
                        sb.AppendLine("=================================================");
                        sb.AppendLine("Instance    Anomaly Score     Labeled");
                        hasRows = true;
                    }
                    var    name  = default(ReadOnlyMemory <char>);
                    Single score = 0;
                    Single label = 0;
                    instanceGetter(ref name);
                    scoreGetter(ref score);
                    labelGetter(ref label);
                    sb.AppendFormat("{0,-10}{1,12:G4}{2,12}", name, score, label);
                    sb.AppendLine();
                }
            }
            if (sb.Length > 0)
            {
                ch.Info(MessageSensitivity.UserData, sb.ToString());
            }

            IDataView overall;

            if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out overall))
            {
                throw Host.Except("No overall metrics found");
            }

            // Find the number of anomalies, and the thresholds.
            int numAnomIndex;

            if (!overall.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.OverallMetrics.NumAnomalies, out numAnomIndex))
            {
                throw Host.Except("Could not find the 'NumAnomalies' column");
            }

            int  stratCol;
            var  hasStrat = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol);
            int  stratVal;
            bool hasStratVals = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal);

            Contracts.Assert(hasStrat == hasStratVals);
            long numAnomalies = 0;

            using (var cursor = overall.GetRowCursor(col => col == numAnomIndex ||
                                                     (hasStrat && col == stratCol)))
            {
                var numAnomGetter = cursor.GetGetter <long>(numAnomIndex);
                ValueGetter <uint> stratGetter = null;
                if (hasStrat)
                {
                    var type = cursor.Schema[stratCol].Type;
                    stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol);
                }
                bool foundRow = false;
                while (cursor.MoveNext())
                {
                    uint strat = 0;
                    if (stratGetter != null)
                    {
                        stratGetter(ref strat);
                    }
                    if (strat > 0)
                    {
                        continue;
                    }
                    if (foundRow)
                    {
                        throw Host.Except("Found multiple non-stratified rows in overall results data view");
                    }
                    foundRow = true;
                    numAnomGetter(ref numAnomalies);
                }
            }

            var kFormatName = string.Format(FoldDrAtKFormat, _k);
            var pFormatName = string.Format(FoldDrAtPFormat, _p);
            var numAnomName = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies);

            (string Source, string Name)[] cols =
Exemplo n.º 16
0
        private protected override Delegate[] CreateGettersCore(Row input, Func <int, bool> activeCols, out Action disposer)
        {
            Host.Assert(LabelIndex >= 0);
            Host.Assert(ScoreIndex >= 0);

            disposer = null;

            long cachedPosition = -1;
            var  label          = default(VBuffer <Float>);
            var  score          = default(VBuffer <Float>);

            ValueGetter <VBuffer <Float> > nullGetter = (ref VBuffer <Float> vec) => vec = default(VBuffer <Float>);
            var labelGetter = activeCols(LabelOutput) || activeCols(L1Output) || activeCols(L2Output) || activeCols(DistCol)
                ? RowCursorUtils.GetVecGetterAs <Float>(NumberType.Float, input, LabelIndex)
                : nullGetter;
            var scoreGetter = activeCols(ScoreOutput) || activeCols(L1Output) || activeCols(L2Output) || activeCols(DistCol)
                ? input.GetGetter <VBuffer <Float> >(ScoreIndex)
                : nullGetter;
            Action updateCacheIfNeeded =
                () =>
            {
                if (cachedPosition != input.Position)
                {
                    labelGetter(ref label);
                    scoreGetter(ref score);
                    cachedPosition = input.Position;
                }
            };

            var getters = new Delegate[5];

            if (activeCols(LabelOutput))
            {
                ValueGetter <VBuffer <Float> > labelFn =
                    (ref VBuffer <Float> dst) =>
                {
                    updateCacheIfNeeded();
                    label.CopyTo(ref dst);
                };
                getters[LabelOutput] = labelFn;
            }
            if (activeCols(ScoreOutput))
            {
                ValueGetter <VBuffer <Float> > scoreFn =
                    (ref VBuffer <Float> dst) =>
                {
                    updateCacheIfNeeded();
                    score.CopyTo(ref dst);
                };
                getters[ScoreOutput] = scoreFn;
            }
            if (activeCols(L1Output))
            {
                ValueGetter <double> l1Fn =
                    (ref double dst) =>
                {
                    updateCacheIfNeeded();
                    dst = VectorUtils.L1Distance(in label, in score);
                };
                getters[L1Output] = l1Fn;
            }