/// <summary>
        /// This is for the scalar case.
        /// </summary>
        private ValueGetter <VBuffer <float> > MakeGetterOne(IRow input, int iinfo)
        {
            Host.AssertValue(input);
            Host.Assert(Infos[iinfo].TypeSrc.IsKey);

            int bitsPerKey = _bitsPerKey[iinfo];

            Host.Assert(bitsPerKey == _types[iinfo].VectorSize);

            int dstLength = _types[iinfo].VectorSize;

            Host.Assert(dstLength > 0);

            var getSrc = RowCursorUtils.GetGetterAs <uint>(NumberType.U4, input, Infos[iinfo].Source);
            var src    = default(uint);
            var bldr   = new BufferBuilder <float>(R4Adder.Instance);

            return
                ((ref VBuffer <float> dst) =>
            {
                getSrc(ref src);
                bldr.Reset(bitsPerKey, false);
                EncodeValueToBinary(bldr, src, bitsPerKey, 0);
                bldr.GetResult(ref dst);

                Contracts.Assert(dst.Length == bitsPerKey);
            });
        }
Example #2
0
            private ValueGetter <int> GetLabelGetter(IRow row, int col, out int labelCardinality)
            {
                // The label column type is checked as part of args validation.
                var type = row.Schema.GetColumnType(col);

                Host.Assert(type.IsKey || type.IsNumber);

                if (type.IsKey)
                {
                    Host.Assert(type.KeyCount > 0);
                    labelCardinality = type.KeyCount;

                    int   size   = type.KeyCount;
                    ulong src    = 0;
                    var   getSrc = RowCursorUtils.GetGetterAs <ulong>(NumberType.U8, row, col);
                    return
                        ((ref int dst) =>
                    {
                        getSrc(ref src);
                        // The value should fall between 0 and _labelCardinality inclusive, where 0 is considered
                        // missing/invalid (this is the contract of the KeyType). However, we still handle the
                        // cases of too large values correctly (by treating them as invalid).
                        if (src <= (ulong)size)
                        {
                            dst = (int)src - 1;
                        }
                        else
                        {
                            dst = -1;
                        }
                    });
                }
                else
                {
                    // REVIEW: replace with trainable binning for numeric value
                    labelCardinality = 2; // any numeric column is split into 0 and 1

                    Double src    = 0;
                    var    getSrc = RowCursorUtils.GetGetterAs <Double>(NumberType.R8, row, col);
                    return
                        ((ref int dst) =>
                    {
                        getSrc(ref src);
                        // NaN maps to -1.
                        if (src > 0)
                        {
                            dst = 1;
                        }
                        else if (src <= 0)
                        {
                            dst = 0;
                        }
                        else
                        {
                            dst = -1;
                        }
                    });
                }
            }
Example #3
0
            protected void SendTelemetryMetric(Dictionary <string, IDataView>[] metricValues)
            {
                Dictionary <string, double> averageMetric = new Dictionary <string, double>();

                foreach (Dictionary <string, IDataView> mValue in metricValues)
                {
                    using (var cursor = mValue.First().Value.GetRowCursor(col => true))
                    {
                        while (cursor.MoveNext())
                        {
                            for (int currentIndex = 0; currentIndex < cursor.Schema.ColumnCount; currentIndex++)
                            {
                                var nameOfMetric = "TLC_" + cursor.Schema.GetColumnName(currentIndex);
                                var type         = cursor.Schema.GetColumnType(currentIndex);
                                if (type.IsNumber)
                                {
                                    var    getter      = RowCursorUtils.GetGetterAs <double>(NumberType.R8, cursor, currentIndex);
                                    double metricValue = 0;
                                    getter(ref metricValue);
                                    if (averageMetric.ContainsKey(nameOfMetric))
                                    {
                                        averageMetric[nameOfMetric] += metricValue;
                                    }
                                    else
                                    {
                                        averageMetric.Add(nameOfMetric, metricValue);
                                    }
                                }
                                else
                                {
                                    if (averageMetric.ContainsKey(nameOfMetric))
                                    {
                                        averageMetric[nameOfMetric] = Double.NaN;
                                    }
                                    else
                                    {
                                        averageMetric.Add(nameOfMetric, Double.NaN);
                                    }
                                }
                            }
                        }
                    }
                }
                Dictionary <string, double> newAverageMetric = new Dictionary <string, double>();

                foreach (var pair in averageMetric)
                {
                    newAverageMetric.Add(pair.Key, pair.Value / metricValues.Length);
                }
                SendTelemetryMetricCore(Host, newAverageMetric);
            }
        protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer)
        {
            Host.AssertValueOrNull(ch);
            Host.AssertValue(input);
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);
            disposer = null;

            var typeSrc = Infos[iinfo].TypeSrc;
            var typeDst = _exes[iinfo].TypeDst;

            if (!typeDst.IsVector)
            {
                return(RowCursorUtils.GetGetterAs(typeDst, input, Infos[iinfo].Source));
            }
            return(RowCursorUtils.GetVecGetterAs(typeDst.AsVector.ItemType, input, Infos[iinfo].Source));
        }
Example #5
0
        private static ValueGetter <Single> GetLabelGetterNotFloat(IRow cursor, int labelIndex)
        {
            var type = cursor.Schema.GetColumnType(labelIndex);

            Contracts.Assert(type != NumberType.R4 && type != NumberType.R8);

            // boolean type label mapping: True -> 1, False -> 0.
            if (type.IsBool)
            {
                var getBoolSrc = cursor.GetGetter <bool>(labelIndex);
                return
                    ((ref Single dst) =>
                {
                    bool src = default;
                    getBoolSrc(ref src);
                    dst = Convert.ToSingle(src);
                });
            }

            Contracts.Check(type.IsKey, "Only floating point number, boolean, and key type values can be used as label.");
            Contracts.Assert(TestGetLabelGetter(type) == null);
            ulong keyMax = (ulong)type.KeyCount;

            if (keyMax == 0)
            {
                keyMax = ulong.MaxValue;
            }
            var getSrc = RowCursorUtils.GetGetterAs <ulong>(NumberType.U8, cursor, labelIndex);

            return
                ((ref Single dst) =>
            {
                ulong src = 0;
                getSrc(ref src);
                if (0 < src && src <= keyMax)
                {
                    dst = src - 1;
                }
                else
                {
                    dst = Single.NaN;
                }
            });
        }
        /// <summary>
        /// This is for the singleton case. This should be equivalent to both Bag and Ord over
        /// a vector of size one.
        /// </summary>
        private ValueGetter <VBuffer <Float> > MakeGetterOne(IRow input, int iinfo)
        {
            Host.AssertValue(input);
            Host.Assert(Infos[iinfo].TypeSrc.IsKey);
            Host.Assert(Infos[iinfo].TypeSrc.KeyCount == _types[iinfo].VectorSize);

            int size = Infos[iinfo].TypeSrc.KeyCount;

            Host.Assert(size > 0);

            var getSrc = RowCursorUtils.GetGetterAs <uint>(NumberType.U4, input, Infos[iinfo].Source);
            var src    = default(uint);

            return
                ((ref VBuffer <Float> dst) =>
            {
                getSrc(ref src);
                if (src == 0 || src > size)
                {
                    dst = new VBuffer <Float>(size, 0, dst.Values, dst.Indices);
                    return;
                }

                var values = dst.Values;
                var indices = dst.Indices;
                if (Utils.Size(values) < 1)
                {
                    values = new Float[1];
                }
                if (Utils.Size(indices) < 1)
                {
                    indices = new int[1];
                }
                values[0] = 1;
                indices[0] = (int)src - 1;

                dst = new VBuffer <Float>(size, 1, values, indices);
            });
        }
Example #7
0
        protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics)
        {
            IDataView top;

            if (!metrics.TryGetValue(AnomalyDetectionEvaluator.TopKResults, out top))
            {
                throw Host.Except("Did not find the top-k results data view");
            }
            var sb = new StringBuilder();

            using (var cursor = top.GetRowCursor(col => true))
            {
                int index;
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Instance, out index))
                {
                    throw Host.Except("Data view does not contain the 'Instance' column");
                }
                var instanceGetter = cursor.GetGetter <ReadOnlyMemory <char> >(index);
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.AnomalyScore, out index))
                {
                    throw Host.Except("Data view does not contain the 'Anomaly Score' column");
                }
                var scoreGetter = cursor.GetGetter <Single>(index);
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Label, out index))
                {
                    throw Host.Except("Data view does not contain the 'Label' column");
                }
                var labelGetter = cursor.GetGetter <Single>(index);

                bool hasRows = false;
                while (cursor.MoveNext())
                {
                    if (!hasRows)
                    {
                        sb.AppendFormat("{0} Top-scored Results", _topScored);
                        sb.AppendLine();
                        sb.AppendLine("=================================================");
                        sb.AppendLine("Instance    Anomaly Score     Labeled");
                        hasRows = true;
                    }
                    var    name  = default(ReadOnlyMemory <char>);
                    Single score = 0;
                    Single label = 0;
                    instanceGetter(ref name);
                    scoreGetter(ref score);
                    labelGetter(ref label);
                    sb.AppendFormat("{0,-10}{1,12:G4}{2,12}", name, score, label);
                    sb.AppendLine();
                }
            }
            if (sb.Length > 0)
            {
                ch.Info(MessageSensitivity.UserData, sb.ToString());
            }

            IDataView overall;

            if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out overall))
            {
                throw Host.Except("No overall metrics found");
            }

            // Find the number of anomalies, and the thresholds.
            int numAnomIndex;

            if (!overall.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.OverallMetrics.NumAnomalies, out numAnomIndex))
            {
                throw Host.Except("Could not find the 'NumAnomalies' column");
            }

            int  stratCol;
            var  hasStrat = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol);
            int  stratVal;
            bool hasStratVals = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal);

            Contracts.Assert(hasStrat == hasStratVals);
            long numAnomalies = 0;

            using (var cursor = overall.GetRowCursor(col => col == numAnomIndex ||
                                                     (hasStrat && col == stratCol)))
            {
                var numAnomGetter = cursor.GetGetter <long>(numAnomIndex);
                ValueGetter <uint> stratGetter = null;
                if (hasStrat)
                {
                    var type = cursor.Schema.GetColumnType(stratCol);
                    stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol);
                }
                bool foundRow = false;
                while (cursor.MoveNext())
                {
                    uint strat = 0;
                    if (stratGetter != null)
                    {
                        stratGetter(ref strat);
                    }
                    if (strat > 0)
                    {
                        continue;
                    }
                    if (foundRow)
                    {
                        throw Host.Except("Found multiple non-stratified rows in overall results data view");
                    }
                    foundRow = true;
                    numAnomGetter(ref numAnomalies);
                }
            }

            var kFormatName = string.Format(FoldDrAtKFormat, _k);
            var pFormatName = string.Format(FoldDrAtPFormat, _p);
            var numAnomName = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies);

            (string Source, string Name)[] cols =
        // The multi-output regression evaluator prints only the per-label metrics for each fold.
        protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics)
        {
            IDataView fold;

            if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out fold))
            {
                throw ch.Except("No overall metrics found");
            }

            int  isWeightedCol;
            bool needWeighted = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.IsWeighted, out isWeightedCol);

            int  stratCol;
            bool hasStrats = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol);
            int  stratVal;
            bool hasStratVals = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal);

            ch.Assert(hasStrats == hasStratVals);

            var colCount       = fold.Schema.ColumnCount;
            var vBufferGetters = new ValueGetter <VBuffer <double> > [colCount];

            using (var cursor = fold.GetRowCursor(col => true))
            {
                DvBool isWeighted = DvBool.False;
                ValueGetter <DvBool> isWeightedGetter;
                if (needWeighted)
                {
                    isWeightedGetter = cursor.GetGetter <DvBool>(isWeightedCol);
                }
                else
                {
                    isWeightedGetter = (ref DvBool dst) => dst = DvBool.False;
                }

                ValueGetter <uint> stratGetter;
                if (hasStrats)
                {
                    var type = cursor.Schema.GetColumnType(stratCol);
                    stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol);
                }
                else
                {
                    stratGetter = (ref uint dst) => dst = 0;
                }

                int labelCount = 0;
                for (int i = 0; i < fold.Schema.ColumnCount; i++)
                {
                    if (fold.Schema.IsHidden(i) || (needWeighted && i == isWeightedCol) ||
                        (hasStrats && (i == stratCol || i == stratVal)))
                    {
                        continue;
                    }

                    var type = fold.Schema.GetColumnType(i);
                    if (type.IsKnownSizeVector && type.ItemType == NumberType.R8)
                    {
                        vBufferGetters[i] = cursor.GetGetter <VBuffer <double> >(i);
                        if (labelCount == 0)
                        {
                            labelCount = type.VectorSize;
                        }
                        else
                        {
                            ch.Check(labelCount == type.VectorSize, "All vector metrics should contain the same number of slots");
                        }
                    }
                }
                var labelNames = new DvText[labelCount];
                for (int j = 0; j < labelCount; j++)
                {
                    labelNames[j] = new DvText(string.Format("Label_{0}", j));
                }

                var sb = new StringBuilder();
                sb.AppendLine("Per-label metrics:");
                sb.AppendFormat("{0,12} ", " ");
                for (int i = 0; i < labelCount; i++)
                {
                    sb.AppendFormat(" {0,20}", labelNames[i]);
                }
                sb.AppendLine();

                VBuffer <Double> metricVals      = default(VBuffer <Double>);
                bool             foundWeighted   = !needWeighted;
                bool             foundUnweighted = false;
                uint             strat           = 0;
                while (cursor.MoveNext())
                {
                    isWeightedGetter(ref isWeighted);
                    if (foundWeighted && isWeighted.IsTrue || foundUnweighted && isWeighted.IsFalse)
                    {
                        throw ch.Except("Multiple {0} rows found in overall metrics data view",
                                        isWeighted.IsTrue ? "weighted" : "unweighted");
                    }
                    if (isWeighted.IsTrue)
                    {
                        foundWeighted = true;
                    }
                    else
                    {
                        foundUnweighted = true;
                    }

                    stratGetter(ref strat);
                    if (strat > 0)
                    {
                        continue;
                    }

                    for (int i = 0; i < colCount; i++)
                    {
                        if (vBufferGetters[i] != null)
                        {
                            vBufferGetters[i](ref metricVals);
                            ch.Assert(metricVals.Length == labelCount);

                            sb.AppendFormat("{0}{1,12}:", isWeighted.IsTrue ? "Weighted " : "", fold.Schema.GetColumnName(i));
                            foreach (var metric in metricVals.Items(all: true))
                            {
                                sb.AppendFormat(" {0,20:G20}", metric.Value);
                            }
                            sb.AppendLine();
                        }
                    }
                    if (foundUnweighted && foundWeighted)
                    {
                        break;
                    }
                }
                ch.Assert(foundUnweighted && foundWeighted);
                ch.Info(sb.ToString());
            }
        }
Example #9
0
        protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics)
        {
            IDataView top;

            if (!metrics.TryGetValue(AnomalyDetectionEvaluator.TopKResults, out top))
            {
                throw Host.Except("Did not find the top-k results data view");
            }
            var sb = new StringBuilder();

            using (var cursor = top.GetRowCursor(col => true))
            {
                int index;
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Instance, out index))
                {
                    throw Host.Except("Data view does not contain the 'Instance' column");
                }
                var instanceGetter = cursor.GetGetter <DvText>(index);
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.AnomalyScore, out index))
                {
                    throw Host.Except("Data view does not contain the 'Anomaly Score' column");
                }
                var scoreGetter = cursor.GetGetter <Single>(index);
                if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.Label, out index))
                {
                    throw Host.Except("Data view does not contain the 'Label' column");
                }
                var labelGetter = cursor.GetGetter <Single>(index);

                bool hasRows = false;
                while (cursor.MoveNext())
                {
                    if (!hasRows)
                    {
                        sb.AppendFormat("{0} Top-scored Results", _topScored);
                        sb.AppendLine();
                        sb.AppendLine("=================================================");
                        sb.AppendLine("Instance    Anomaly Score     Labeled");
                        hasRows = true;
                    }
                    var    name  = default(DvText);
                    Single score = 0;
                    Single label = 0;
                    instanceGetter(ref name);
                    scoreGetter(ref score);
                    labelGetter(ref label);
                    sb.AppendFormat("{0,-10}{1,12:G4}{2,12}", name, score, label);
                    sb.AppendLine();
                }
            }
            if (sb.Length > 0)
            {
                ch.Info(MessageSensitivity.UserData, sb.ToString());
            }

            IDataView overall;

            if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out overall))
            {
                throw Host.Except("No overall metrics found");
            }

            // Find the number of anomalies, and the thresholds.
            int numAnomIndex;

            if (!overall.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.OverallMetrics.NumAnomalies, out numAnomIndex))
            {
                throw Host.Except("Could not find the 'NumAnomalies' column");
            }

            int  stratCol;
            var  hasStrat = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol);
            int  stratVal;
            bool hasStratVals = overall.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal);

            Contracts.Assert(hasStrat == hasStratVals);
            DvInt8 numAnomalies = 0;

            using (var cursor = overall.GetRowCursor(col => col == numAnomIndex ||
                                                     (hasStrat && col == stratCol)))
            {
                var numAnomGetter = cursor.GetGetter <DvInt8>(numAnomIndex);
                ValueGetter <uint> stratGetter = null;
                if (hasStrat)
                {
                    var type = cursor.Schema.GetColumnType(stratCol);
                    stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol);
                }
                bool foundRow = false;
                while (cursor.MoveNext())
                {
                    uint strat = 0;
                    if (stratGetter != null)
                    {
                        stratGetter(ref strat);
                    }
                    if (strat > 0)
                    {
                        continue;
                    }
                    if (foundRow)
                    {
                        throw Host.Except("Found multiple non-stratified rows in overall results data view");
                    }
                    foundRow = true;
                    numAnomGetter(ref numAnomalies);
                }
            }

            var args = new ChooseColumnsTransform.Arguments();
            var cols = new List <ChooseColumnsTransform.Column>()
            {
                new ChooseColumnsTransform.Column()
                {
                    Name   = string.Format(FoldDrAtKFormat, _k),
                    Source = AnomalyDetectionEvaluator.OverallMetrics.DrAtK
                },
                new ChooseColumnsTransform.Column()
                {
                    Name   = string.Format(FoldDrAtPFormat, _p),
                    Source = AnomalyDetectionEvaluator.OverallMetrics.DrAtPFpr
                },
                new ChooseColumnsTransform.Column()
                {
                    Name   = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies),
                    Source = AnomalyDetectionEvaluator.OverallMetrics.DrAtNumPos
                },
                new ChooseColumnsTransform.Column()
                {
                    Name = AnomalyDetectionEvaluator.OverallMetrics.ThreshAtK
                },
                new ChooseColumnsTransform.Column()
                {
                    Name = AnomalyDetectionEvaluator.OverallMetrics.ThreshAtP
                },
                new ChooseColumnsTransform.Column()
                {
                    Name = AnomalyDetectionEvaluator.OverallMetrics.ThreshAtNumPos
                },
                new ChooseColumnsTransform.Column()
                {
                    Name = BinaryClassifierEvaluator.Auc
                }
            };

            args.Column = cols.ToArray();
            IDataView fold = new ChooseColumnsTransform(Host, args, overall);
            string    weightedFold;

            ch.Info(MetricWriter.GetPerFoldResults(Host, fold, out weightedFold));
        }