/// <summary>
 /// Creates a loader that loads SVM-light like files, where features are specified by their names.
 /// </summary>
 /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
 /// <param name="numberOfRows">The number of rows from the sample to be used for determining the set of feature names.</param>
 /// <param name="dataSample">A data sample to be used for determining the set of features names.</param>
 public static SvmLightLoader CreateSvmLightLoaderWithFeatureNames(this DataOperationsCatalog catalog,
                                                                   long?numberOfRows             = null,
                                                                   IMultiStreamSource dataSample = null)
 => new SvmLightLoader(CatalogUtils.GetEnvironment(catalog), new SvmLightLoader.Options()
 {
     NumberOfRows = numberOfRows, FeatureIndices = SvmLightLoader.FeatureIndices.Names
 }, dataSample);
        private string GetSplitColumn(IChannel ch, IDataView input, ref IDataView output)
        {
            // The stratification column and/or group column, if they exist at all, must be present at this point.
            var schema = input.Schema;

            output = input;
            // If no stratification column was specified, but we have a group column of type Single, Double or
            // Key (contiguous) use it.
            string stratificationColumn = null;

            if (!string.IsNullOrWhiteSpace(ImplOptions.StratificationColumn))
            {
                stratificationColumn = ImplOptions.StratificationColumn;
            }
            else
            {
                string group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(ImplOptions.GroupColumn), ImplOptions.GroupColumn, DefaultColumnNames.GroupId);
                int    index;
                if (group != null && schema.TryGetColumnIndex(group, out index))
                {
                    // Check if group column key type with known cardinality.
                    var type = schema[index].Type;
                    if (type.GetKeyCount() > 0)
                    {
                        stratificationColumn = group;
                    }
                }
            }

            var splitColumn = DataOperationsCatalog.CreateSplitColumn(Host, ref output, stratificationColumn);

            return(splitColumn);
        }
        private static IDataView GetRankingMetrics(
            IHostEnvironment env,
            IPredictor predictor,
            RoleMappedData roleMappedData,
            PermutationFeatureImportanceArguments input)
        {
            var roles             = roleMappedData.Schema.GetColumnRoleNames();
            var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value;
            var labelColumnName   = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value;
            var groupIdColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Group.Value).First().Value;
            var pred = new RankingPredictionTransformer <IPredictorProducing <float> >(
                env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName);
            var rankingCatalog     = new RankingCatalog(env);
            var permutationMetrics = rankingCatalog
                                     .PermutationFeatureImportance(pred,
                                                                   roleMappedData.Data,
                                                                   labelColumnName: labelColumnName,
                                                                   rowGroupColumnName: groupIdColumnName,
                                                                   useFeatureWeightFilter: input.UseFeatureWeightFilter,
                                                                   numberOfExamplesToUse: input.NumberOfExamplesToUse,
                                                                   permutationCount: input.PermutationCount);

            var slotNames = GetSlotNames(roleMappedData.Schema);

            Contracts.Assert(slotNames.Length == permutationMetrics.Length,
                             "Mismatch between number of feature slots and number of features permuted.");

            List <RankingMetrics> metrics = new List <RankingMetrics>();

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                if (string.IsNullOrWhiteSpace(slotNames[i]))
                {
                    continue;
                }
                var pMetric = permutationMetrics[i];
                metrics.Add(new RankingMetrics
                {
                    FeatureName = slotNames[i],
                    DiscountedCumulativeGains                 = pMetric.DiscountedCumulativeGains.Select(x => x.Mean).ToArray(),
                    DiscountedCumulativeGainsStdErr           = pMetric.DiscountedCumulativeGains.Select(x => x.StandardError).ToArray(),
                    NormalizedDiscountedCumulativeGains       = pMetric.NormalizedDiscountedCumulativeGains.Select(x => x.Mean).ToArray(),
                    NormalizedDiscountedCumulativeGainsStdErr = pMetric.NormalizedDiscountedCumulativeGains.Select(x => x.StandardError).ToArray()
                });
            }

            // Convert unknown size vectors to known size.
            var metric = metrics.First();
            SchemaDefinition schema = SchemaDefinition.Create(typeof(RankingMetrics));

            ConvertVectorToKnownSize(nameof(metric.DiscountedCumulativeGains), metric.DiscountedCumulativeGains.Length, ref schema);
            ConvertVectorToKnownSize(nameof(metric.NormalizedDiscountedCumulativeGains), metric.NormalizedDiscountedCumulativeGains.Length, ref schema);
            ConvertVectorToKnownSize(nameof(metric.DiscountedCumulativeGainsStdErr), metric.DiscountedCumulativeGainsStdErr.Length, ref schema);
            ConvertVectorToKnownSize(nameof(metric.NormalizedDiscountedCumulativeGainsStdErr), metric.NormalizedDiscountedCumulativeGainsStdErr.Length, ref schema);

            var dataOps = new DataOperationsCatalog(env);
            var result  = dataOps.LoadFromEnumerable(metrics, schema);

            return(result);
        }
Пример #4
0
        public static Output Split(IHostEnvironment env, Input input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(ModuleName);

            host.CheckValue(input, nameof(input));
            host.Check(0 < input.Fraction && input.Fraction < 1, "The fraction must be in the interval (0,1).");

            EntryPointUtils.CheckInputArgs(host, input);

            var data     = input.Data;
            var splitCol = DataOperationsCatalog.CreateSplitColumn(env, ref data, input.StratificationColumn);

            IDataView trainData = new RangeFilter(host,
                                                  new RangeFilter.Options {
                Column = splitCol, Min = 0, Max = input.Fraction, Complement = false
            }, data);

            trainData = ColumnSelectingTransformer.CreateDrop(host, trainData, splitCol);

            IDataView testData = new RangeFilter(host,
                                                 new RangeFilter.Options {
                Column = splitCol, Min = 0, Max = input.Fraction, Complement = true
            }, data);

            testData = ColumnSelectingTransformer.CreateDrop(host, testData, splitCol);

            return(new Output()
            {
                TrainData = trainData, TestData = testData
            });
        }
Пример #5
0
 /// <summary>
 /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type.
 /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will
 /// never alter the contents of the <paramref name="data"/>.
 /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support
 /// multiple enumeration of the <paramref name="data"/> that would return the same results, unless
 /// the user knows that the data will only be cursored once.
 ///
 /// One typical usage for streaming data view could be: create the data view that lazily loads data
 /// as needed, then apply pre-trained transformations to it and cursor through it for transformation
 /// results.
 /// </summary>
 /// <typeparam name="TRow">The user-defined item type.</typeparam>
 /// <param name="catalog">The context to use for data view creation.</param>
 /// <param name="data">The data to wrap around.</param>
 /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>,
 /// the schema definition is inferred from <typeparamref name="TRow"/>.</param>
 /// <returns>The constructed <see cref="IDataView"/>.</returns>
 public static IDataView ReadFromEnumerable <TRow>(this DataOperationsCatalog catalog, IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null)
     where TRow : class
 {
     catalog.Environment.CheckValue(data, nameof(data));
     catalog.Environment.CheckValueOrNull(schemaDefinition);
     return(DataViewConstructionUtils.CreateFromEnumerable(catalog.Environment, data, schemaDefinition));
 }
Пример #6
0
 public static void SaveAsMd(this DataOperationsCatalog _, DataDebuggerPreview preview, TextWriter writer)
 {
     writer.WriteLine($"| {String.Join(" | ", preview.Schema.Select(s => s.Name))} |");
     writer.WriteLine($"| {String.Join(" | ", preview.Schema.Select(s => "------"))} |");
     foreach (var row in preview.RowView)
     {
         writer.WriteLine($" | {String.Join(" | ", row.Values.Select(x => x.Value.ToString()))} | ");
     }
 }
 /// <summary>
 /// Creates a loader that loads SVM-light format files. <see cref="SvmLightLoader"/>.
 /// </summary>
 /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
 /// <param name="inputSize">The number of features in the Features column. If 0 is specified, the
 /// loader will determine it by looking at the file sample given in <paramref name="dataSample"/>.</param>
 /// <param name="numberOfRows">The number of rows from the sample to be used for determining the number of features.</param>
 /// <param name="zeroBased">If the file contains zero-based indices, this parameter should be set to true. If they are one-based
 /// it should be set to false.</param>
 /// <param name="dataSample">A data sample to be used for determining the number of features in the Features column.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[LoadingSvmLight](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadingSvmLight.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static SvmLightLoader CreateSvmLightLoader(this DataOperationsCatalog catalog,
                                                   long?numberOfRows             = null,
                                                   int inputSize                 = 0,
                                                   bool zeroBased                = false,
                                                   IMultiStreamSource dataSample = null)
 => new SvmLightLoader(CatalogUtils.GetEnvironment(catalog), new SvmLightLoader.Options()
 {
     InputSize = inputSize, NumberOfRows = numberOfRows, FeatureIndices = zeroBased ?
                                                                          SvmLightLoader.FeatureIndices.ZeroBased : SvmLightLoader.FeatureIndices.OneBased
 }, dataSample);
        private static IDataView GetRegressionMetrics(
            IHostEnvironment env,
            IPredictor predictor,
            RoleMappedData roleMappedData,
            PermutationFeatureImportanceArguments input)
        {
            var roles             = roleMappedData.Schema.GetColumnRoleNames();
            var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value;
            var labelColumnName   = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value;
            var pred = new RegressionPredictionTransformer <IPredictorProducing <float> >(
                env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName);
            var regressionCatalog  = new RegressionCatalog(env);
            var permutationMetrics = regressionCatalog
                                     .PermutationFeatureImportance(pred,
                                                                   roleMappedData.Data,
                                                                   labelColumnName: labelColumnName,
                                                                   useFeatureWeightFilter: input.UseFeatureWeightFilter,
                                                                   numberOfExamplesToUse: input.NumberOfExamplesToUse,
                                                                   permutationCount: input.PermutationCount);

            var slotNames = GetSlotNames(roleMappedData.Schema);

            Contracts.Assert(slotNames.Length == permutationMetrics.Length,
                             "Mismatch between number of feature slots and number of features permuted.");

            List <RegressionMetrics> metrics = new List <RegressionMetrics>();

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                if (string.IsNullOrWhiteSpace(slotNames[i]))
                {
                    continue;
                }
                var pMetric = permutationMetrics[i];
                metrics.Add(new RegressionMetrics
                {
                    FeatureName                = slotNames[i],
                    MeanAbsoluteError          = pMetric.MeanAbsoluteError.Mean,
                    MeanAbsoluteErrorStdErr    = pMetric.MeanAbsoluteError.StandardError,
                    MeanSquaredError           = pMetric.MeanSquaredError.Mean,
                    MeanSquaredErrorStdErr     = pMetric.MeanSquaredError.StandardError,
                    RootMeanSquaredError       = pMetric.RootMeanSquaredError.Mean,
                    RootMeanSquaredErrorStdErr = pMetric.RootMeanSquaredError.StandardError,
                    LossFunction               = pMetric.LossFunction.Mean,
                    LossFunctionStdErr         = pMetric.LossFunction.StandardError,
                    RSquared       = pMetric.RSquared.Mean,
                    RSquaredStdErr = pMetric.RSquared.StandardError
                });
            }

            var dataOps = new DataOperationsCatalog(env);
            var result  = dataOps.LoadFromEnumerable(metrics);

            return(result);
        }
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file containing features specified by feature names,
        /// using <see cref="SvmLightLoader"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">The path to the file.</param>
        /// <param name="numberOfRows">The number of rows from the sample to be used for determining the set of feature names.</param>
        public static IDataView LoadFromSvmLightFileWithFeatureNames(this DataOperationsCatalog catalog,
                                                                     string path,
                                                                     long?numberOfRows = null)
        {
            Contracts.CheckNonEmpty(path, nameof(path));

            var file   = new MultiFileSource(path);
            var loader = catalog.CreateSvmLightLoaderWithFeatureNames(numberOfRows, file);

            return(loader.Load(file));
        }
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="SvmLightLoader"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">The path to the file.</param>
        /// <param name="inputSize">The number of features in the Features column. If 0 is specified, the
        /// loader will determine it by looking at the file given in <paramref name="path"/>.</param>
        /// <param name="zeroBased">If the file contains zero-based indices, this parameter should be set to true. If they are one-based
        /// it should be set to false.</param>
        /// <param name="numberOfRows">The number of rows from the sample to be used for determining the number of features.</param>
        public static IDataView LoadFromSvmLightFile(this DataOperationsCatalog catalog,
                                                     string path,
                                                     long?numberOfRows = null,
                                                     int inputSize     = 0,
                                                     bool zeroBased    = false)
        {
            Contracts.CheckNonEmpty(path, nameof(path));

            var file   = new MultiFileSource(path);
            var loader = catalog.CreateSvmLightLoader(numberOfRows, inputSize, zeroBased, file);

            return(loader.Load(file));
        }
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file containing features specified by feature names,
        /// using <see cref="SvmLightLoader"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">The path to the file.</param>
        /// <param name="numberOfRows">The number of rows from the sample to be used for determining the set of feature names.</param>
        public static IDataView LoadFromSvmLightFileWithFeatureNames(this DataOperationsCatalog catalog,
                                                                     string path,
                                                                     long?numberOfRows = null)
        {
            Contracts.CheckNonEmpty(path, nameof(path));
            if (!File.Exists(path))
            {
                throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
            }

            var file   = new MultiFileSource(path);
            var loader = catalog.CreateSvmLightLoaderWithFeatureNames(numberOfRows, file);

            return(loader.Load(file));
        }
        /// <summary>
        /// Load a <see cref="IDataView"/> from a text file using <see cref="SvmLightLoader"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="path">The path to the file.</param>
        /// <param name="inputSize">The number of features in the Features column. If 0 is specified, the
        /// loader will determine it by looking at the file given in <paramref name="path"/>.</param>
        /// <param name="zeroBased">If the file contains zero-based indices, this parameter should be set to true. If they are one-based
        /// it should be set to false.</param>
        /// <param name="numberOfRows">The number of rows from the sample to be used for determining the number of features.</param>
        public static IDataView LoadFromSvmLightFile(this DataOperationsCatalog catalog,
                                                     string path,
                                                     long?numberOfRows = null,
                                                     int inputSize     = 0,
                                                     bool zeroBased    = false)
        {
            Contracts.CheckNonEmpty(path, nameof(path));
            if (!File.Exists(path))
            {
                throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
            }

            var file   = new MultiFileSource(path);
            var loader = catalog.CreateSvmLightLoader(numberOfRows, inputSize, zeroBased, file);

            return(loader.Load(file));
        }
Пример #13
0
        public static Output Split(IHostEnvironment env, Input input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(ModuleName);

            host.CheckValue(input, nameof(input));

            EntryPointUtils.CheckInputArgs(host, input);

            var data = input.Data;

            var splitCol = DataOperationsCatalog.CreateSplitColumn(env, ref data, input.StratificationColumn);

            int n      = input.NumFolds;
            var output = new Output
            {
                TrainData = new IDataView[n],
                TestData  = new IDataView[n]
            };

            // Construct per-fold datasets.
            double fraction = 1.0 / n;

            for (int i = 0; i < n; i++)
            {
                var trainData = new RangeFilter(host,
                                                new RangeFilter.Options {
                    Column = splitCol, Min = i * fraction, Max = (i + 1) * fraction, Complement = true
                }, data);
                output.TrainData[i] = ColumnSelectingTransformer.CreateDrop(host, trainData, splitCol);

                var testData = new RangeFilter(host,
                                               new RangeFilter.Options {
                    Column = splitCol, Min = i * fraction, Max = (i + 1) * fraction, Complement = false
                }, data);
                output.TestData[i] = ColumnSelectingTransformer.CreateDrop(host, testData, splitCol);
            }

            return(output);
        }
        /// <summary>
        /// Save the <see cref="IDataView"/> in SVM-light format. Four columns can be saved: a label and a features column,
        /// and optionally a group ID column and an example weight column.
        /// </summary>
        /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
        /// <param name="data">The data view to save.</param>
        /// <param name="stream">The stream to write to.</param>
        /// <param name="zeroBasedIndexing">Whether to index the features starting at 0 or at 1.</param>
        /// <param name="binaryLabel">If set to true, saves 1 for positive labels, -1 for non-positive labels and 0 for NaN.
        /// Otherwise, saves the value of the label in the data view.</param>
        /// <param name="labelColumnName">The name of the column to be saved as the label column.</param>
        /// <param name="featureColumnName">The name of the column to be saved as the features column.</param>
        /// <param name="rowGroupColumnName">The name of the column to be saved as the group ID column. If null, a group ID column
        /// will not be saved.</param>
        /// <param name="exampleWeightColumnName">The name of the column to be saved as the weight column. If null, a weight column
        /// will not be saved.</param>
        public static void SaveInSvmLightFormat(this DataOperationsCatalog catalog,
                                                IDataView data,
                                                Stream stream,
                                                bool zeroBasedIndexing         = false,
                                                bool binaryLabel               = false,
                                                string labelColumnName         = DefaultColumnNames.Label,
                                                string featureColumnName       = DefaultColumnNames.Features,
                                                string rowGroupColumnName      = null,
                                                string exampleWeightColumnName = null)
        {
            var args = new SvmLightSaver.Arguments()
            {
                Zero                    = zeroBasedIndexing,
                Binary                  = binaryLabel,
                LabelColumnName         = labelColumnName,
                FeatureColumnName       = featureColumnName,
                RowGroupColumnName      = rowGroupColumnName,
                ExampleWeightColumnName = exampleWeightColumnName
            };

            var saver = new SvmLightSaver(CatalogUtils.GetEnvironment(catalog), args);

            saver.SaveData(stream, data, data.Schema.Select(col => col.Index).ToArray());
        }
Пример #15
0
 /// <summary>
 /// Split the dataset into the train set and test set according to the given fraction.
 /// Respects the <paramref name="stratificationColumn"/> if provided.
 /// </summary>
 /// <typeparam name="T">The tuple describing the data schema.</typeparam>
 /// <param name="catalog">The training catalog.</param>
 /// <param name="data">The dataset to split.</param>
 /// <param name="testFraction">The fraction of data to go into the test set.</param>
 /// <param name="stratificationColumn">Optional selector for the column to use as a stratification column. If two examples share the same value of the <paramref name="stratificationColumn"/>
 /// (if provided), they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from train to the test set.
 /// If this optional parameter is not provided, a stratification columns will be generated, and its values will be random numbers .</param>
 /// <param name="seed">Optional parameter used in combination with the <paramref name="stratificationColumn"/>.
 /// If the <paramref name="stratificationColumn"/> is not provided, the random numbers generated to create it, will use this seed as value.
 /// And if it is not provided, the default value will be used.</param>
 /// <returns>A pair of datasets, for the train and test set.</returns>
 public static (DataView <T> trainSet, DataView <T> testSet) TrainTestSplit <T>(this DataOperationsCatalog catalog,
                                                                                DataView <T> data, double testFraction = 0.1, Func <T, PipelineColumn> stratificationColumn = null, uint?seed = null)
 {
 /// <summary>
 /// Configures a loader for text files.
 /// </summary>
 /// <typeparam name="TShape">The type shape parameter, which must be a valid-schema shape. As a practical
 /// matter this is generally not explicitly defined from the user, but is instead inferred from the return
 /// type of the <paramref name="func"/> where one takes an input <see cref="Context"/> and uses it to compose
 /// a shape-type instance describing what the columns are and how to load them from the file.</typeparam>
 /// <param name="catalog">The catalog.</param>
 /// <param name="func">The delegate that describes what fields to read from the text file, as well as
 /// describing their input type. The way in which it works is that the delegate is fed a <see cref="Context"/>,
 /// and the user composes a shape type with <see cref="PipelineColumn"/> instances out of that <see cref="Context"/>.
 /// The resulting data will have columns with the names corresponding to their names in the shape type.</param>
 /// <param name="files">Input files.</param>
 /// <param name="hasHeader">Data file has header with feature names.</param>
 /// <param name="separator">Text field separator.</param>
 /// <param name="allowQuoting">Whether the input -may include quoted values, which can contain separator
 /// characters, colons, and distinguish empty values from missing values. When true, consecutive separators
 /// denote a missing value and an empty value is denoted by <c>""</c>. When false, consecutive separators
 /// denote an empty value.</param>
 /// <param name="allowSparse">Whether the input may include sparse representations.</param>
 /// <param name="trimWhitspace">Remove trailing whitespace from lines.</param>
 /// <returns>A configured statically-typed loader for text files.</returns>
 public static DataLoader <IMultiStreamSource, TShape> CreateTextLoader <[IsShape] TShape>(
     this DataOperationsCatalog catalog, Func <Context, TShape> func, IMultiStreamSource files = null,
     bool hasHeader     = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true,
     bool trimWhitspace = false)
 => CreateLoader(catalog.Environment, func, files, separator, hasHeader, allowQuoting, allowSparse, trimWhitspace);
Пример #17
0
 public static IHostEnvironment GetEnvironment(this DataOperationsCatalog catalog) => Contracts.CheckRef(catalog, nameof(catalog)).Environment;
        private static IDataView GetBinaryMetrics(
            IHostEnvironment env,
            IPredictor predictor,
            RoleMappedData roleMappedData,
            PermutationFeatureImportanceArguments input)
        {
            var roles             = roleMappedData.Schema.GetColumnRoleNames();
            var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value;
            var labelColumnName   = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value;
            var pred = new BinaryPredictionTransformer <IPredictorProducing <float> >(
                env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName);
            var binaryCatalog      = new BinaryClassificationCatalog(env);
            var permutationMetrics = binaryCatalog
                                     .PermutationFeatureImportance(pred,
                                                                   roleMappedData.Data,
                                                                   labelColumnName: labelColumnName,
                                                                   useFeatureWeightFilter: input.UseFeatureWeightFilter,
                                                                   numberOfExamplesToUse: input.NumberOfExamplesToUse,
                                                                   permutationCount: input.PermutationCount);

            var slotNames = GetSlotNames(roleMappedData.Schema);

            Contracts.Assert(slotNames.Length == permutationMetrics.Length,
                             "Mismatch between number of feature slots and number of features permuted.");

            List <BinaryMetrics> metrics = new List <BinaryMetrics>();

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                if (string.IsNullOrWhiteSpace(slotNames[i]))
                {
                    continue;
                }
                var pMetric = permutationMetrics[i];
                metrics.Add(new BinaryMetrics
                {
                    FeatureName             = slotNames[i],
                    AreaUnderRocCurve       = pMetric.AreaUnderRocCurve.Mean,
                    AreaUnderRocCurveStdErr = pMetric.AreaUnderRocCurve.StandardError,
                    Accuracy                = pMetric.Accuracy.Mean,
                    AccuracyStdErr          = pMetric.Accuracy.StandardError,
                    PositivePrecision       = pMetric.PositivePrecision.Mean,
                    PositivePrecisionStdErr = pMetric.PositivePrecision.StandardError,
                    PositiveRecall          = pMetric.PositiveRecall.Mean,
                    PositiveRecallStdErr    = pMetric.PositiveRecall.StandardError,
                    NegativePrecision       = pMetric.NegativePrecision.Mean,
                    NegativePrecisionStdErr = pMetric.NegativePrecision.StandardError,
                    NegativeRecall          = pMetric.NegativeRecall.Mean,
                    NegativeRecallStdErr    = pMetric.NegativeRecall.StandardError,
                    F1Score       = pMetric.F1Score.Mean,
                    F1ScoreStdErr = pMetric.F1Score.StandardError,
                    AreaUnderPrecisionRecallCurve       = pMetric.AreaUnderPrecisionRecallCurve.Mean,
                    AreaUnderPrecisionRecallCurveStdErr = pMetric.AreaUnderPrecisionRecallCurve.StandardError
                });
            }

            var dataOps = new DataOperationsCatalog(env);
            var result  = dataOps.LoadFromEnumerable(metrics);

            return(result);
        }
        private static IDataView GetMulticlassMetrics(
            IHostEnvironment env,
            IPredictor predictor,
            RoleMappedData roleMappedData,
            PermutationFeatureImportanceArguments input)
        {
            var roles             = roleMappedData.Schema.GetColumnRoleNames();
            var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value;
            var labelColumnName   = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value;
            var pred = new MulticlassPredictionTransformer <IPredictorProducing <VBuffer <float> > >(
                env, predictor as IPredictorProducing <VBuffer <float> >, roleMappedData.Data.Schema, featureColumnName, labelColumnName);
            var multiclassCatalog  = new MulticlassClassificationCatalog(env);
            var permutationMetrics = multiclassCatalog
                                     .PermutationFeatureImportance(pred,
                                                                   roleMappedData.Data,
                                                                   labelColumnName: labelColumnName,
                                                                   useFeatureWeightFilter: input.UseFeatureWeightFilter,
                                                                   numberOfExamplesToUse: input.NumberOfExamplesToUse,
                                                                   permutationCount: input.PermutationCount);

            var slotNames = GetSlotNames(roleMappedData.Schema);

            Contracts.Assert(slotNames.Length == permutationMetrics.Length,
                             "Mismatch between number of feature slots and number of features permuted.");

            List <MulticlassMetrics> metrics = new List <MulticlassMetrics>();

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                if (string.IsNullOrWhiteSpace(slotNames[i]))
                {
                    continue;
                }
                var pMetric = permutationMetrics[i];
                metrics.Add(new MulticlassMetrics
                {
                    FeatureName            = slotNames[i],
                    MacroAccuracy          = pMetric.MacroAccuracy.Mean,
                    MacroAccuracyStdErr    = pMetric.MacroAccuracy.StandardError,
                    MicroAccuracy          = pMetric.MicroAccuracy.Mean,
                    MicroAccuracyStdErr    = pMetric.MicroAccuracy.StandardError,
                    LogLoss                = pMetric.LogLoss.Mean,
                    LogLossStdErr          = pMetric.LogLoss.StandardError,
                    LogLossReduction       = pMetric.LogLossReduction.Mean,
                    LogLossReductionStdErr = pMetric.LogLossReduction.StandardError,
                    TopKAccuracy           = pMetric.TopKAccuracy.Mean,
                    TopKAccuracyStdErr     = pMetric.TopKAccuracy.StandardError,
                    PerClassLogLoss        = pMetric.PerClassLogLoss.Select(x => x.Mean).ToArray(),
                    PerClassLogLossStdErr  = pMetric.PerClassLogLoss.Select(x => x.StandardError).ToArray()
                });;
            }

            // Convert unknown size vectors to known size.
            var metric = metrics.First();
            SchemaDefinition schema = SchemaDefinition.Create(typeof(MulticlassMetrics));

            ConvertVectorToKnownSize(nameof(metric.PerClassLogLoss), metric.PerClassLogLoss.Length, ref schema);
            ConvertVectorToKnownSize(nameof(metric.PerClassLogLossStdErr), metric.PerClassLogLossStdErr.Length, ref schema);

            var dataOps = new DataOperationsCatalog(env);
            var result  = dataOps.LoadFromEnumerable(metrics, schema);

            return(result);
        }
 public static IDataView LoadFromXpoObject(this DataOperationsCatalog Instance, Session session, Type ObjectType, string Properties, CriteriaOperator Criteria, string TextPropertyName, string LabelPropertyName)
 {
     DevExpress.Xpo.XPView View = new DevExpress.Xpo.XPView(session, ObjectType, Properties, Criteria);
     return(new XpoInputObjectDataView(View, TextPropertyName, LabelPropertyName));
 }
 public static IDataView ReadTeamStatistics(this DataOperationsCatalog data, string file)
 {
     return(data.LoadFromTextFile <TeamStatistics>(file, separatorChar: ',', hasHeader: true));
 }