/// <summary> /// Creates a loader that loads SVM-light like files, where features are specified by their names. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="numberOfRows">The number of rows from the sample to be used for determining the set of feature names.</param> /// <param name="dataSample">A data sample to be used for determining the set of features names.</param> public static SvmLightLoader CreateSvmLightLoaderWithFeatureNames(this DataOperationsCatalog catalog, long?numberOfRows = null, IMultiStreamSource dataSample = null) => new SvmLightLoader(CatalogUtils.GetEnvironment(catalog), new SvmLightLoader.Options() { NumberOfRows = numberOfRows, FeatureIndices = SvmLightLoader.FeatureIndices.Names }, dataSample);
private string GetSplitColumn(IChannel ch, IDataView input, ref IDataView output) { // The stratification column and/or group column, if they exist at all, must be present at this point. var schema = input.Schema; output = input; // If no stratification column was specified, but we have a group column of type Single, Double or // Key (contiguous) use it. string stratificationColumn = null; if (!string.IsNullOrWhiteSpace(ImplOptions.StratificationColumn)) { stratificationColumn = ImplOptions.StratificationColumn; } else { string group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(ImplOptions.GroupColumn), ImplOptions.GroupColumn, DefaultColumnNames.GroupId); int index; if (group != null && schema.TryGetColumnIndex(group, out index)) { // Check if group column key type with known cardinality. var type = schema[index].Type; if (type.GetKeyCount() > 0) { stratificationColumn = group; } } } var splitColumn = DataOperationsCatalog.CreateSplitColumn(Host, ref output, stratificationColumn); return(splitColumn); }
private static IDataView GetRankingMetrics( IHostEnvironment env, IPredictor predictor, RoleMappedData roleMappedData, PermutationFeatureImportanceArguments input) { var roles = roleMappedData.Schema.GetColumnRoleNames(); var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value; var labelColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value; var groupIdColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Group.Value).First().Value; var pred = new RankingPredictionTransformer <IPredictorProducing <float> >( env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName); var rankingCatalog = new RankingCatalog(env); var permutationMetrics = rankingCatalog .PermutationFeatureImportance(pred, roleMappedData.Data, labelColumnName: labelColumnName, rowGroupColumnName: groupIdColumnName, useFeatureWeightFilter: input.UseFeatureWeightFilter, numberOfExamplesToUse: input.NumberOfExamplesToUse, permutationCount: input.PermutationCount); var slotNames = GetSlotNames(roleMappedData.Schema); Contracts.Assert(slotNames.Length == permutationMetrics.Length, "Mismatch between number of feature slots and number of features permuted."); List <RankingMetrics> metrics = new List <RankingMetrics>(); for (int i = 0; i < permutationMetrics.Length; i++) { if (string.IsNullOrWhiteSpace(slotNames[i])) { continue; } var pMetric = permutationMetrics[i]; metrics.Add(new RankingMetrics { FeatureName = slotNames[i], DiscountedCumulativeGains = pMetric.DiscountedCumulativeGains.Select(x => x.Mean).ToArray(), DiscountedCumulativeGainsStdErr = pMetric.DiscountedCumulativeGains.Select(x => x.StandardError).ToArray(), NormalizedDiscountedCumulativeGains = pMetric.NormalizedDiscountedCumulativeGains.Select(x => x.Mean).ToArray(), NormalizedDiscountedCumulativeGainsStdErr = pMetric.NormalizedDiscountedCumulativeGains.Select(x => x.StandardError).ToArray() }); } // Convert unknown size vectors to known size. var metric = metrics.First(); SchemaDefinition schema = SchemaDefinition.Create(typeof(RankingMetrics)); ConvertVectorToKnownSize(nameof(metric.DiscountedCumulativeGains), metric.DiscountedCumulativeGains.Length, ref schema); ConvertVectorToKnownSize(nameof(metric.NormalizedDiscountedCumulativeGains), metric.NormalizedDiscountedCumulativeGains.Length, ref schema); ConvertVectorToKnownSize(nameof(metric.DiscountedCumulativeGainsStdErr), metric.DiscountedCumulativeGainsStdErr.Length, ref schema); ConvertVectorToKnownSize(nameof(metric.NormalizedDiscountedCumulativeGainsStdErr), metric.NormalizedDiscountedCumulativeGainsStdErr.Length, ref schema); var dataOps = new DataOperationsCatalog(env); var result = dataOps.LoadFromEnumerable(metrics, schema); return(result); }
public static Output Split(IHostEnvironment env, Input input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(ModuleName); host.CheckValue(input, nameof(input)); host.Check(0 < input.Fraction && input.Fraction < 1, "The fraction must be in the interval (0,1)."); EntryPointUtils.CheckInputArgs(host, input); var data = input.Data; var splitCol = DataOperationsCatalog.CreateSplitColumn(env, ref data, input.StratificationColumn); IDataView trainData = new RangeFilter(host, new RangeFilter.Options { Column = splitCol, Min = 0, Max = input.Fraction, Complement = false }, data); trainData = ColumnSelectingTransformer.CreateDrop(host, trainData, splitCol); IDataView testData = new RangeFilter(host, new RangeFilter.Options { Column = splitCol, Min = 0, Max = input.Fraction, Complement = true }, data); testData = ColumnSelectingTransformer.CreateDrop(host, testData, splitCol); return(new Output() { TrainData = trainData, TestData = testData }); }
/// <summary> /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type. /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support /// multiple enumeration of the <paramref name="data"/> that would return the same results, unless /// the user knows that the data will only be cursored once. /// /// One typical usage for streaming data view could be: create the data view that lazily loads data /// as needed, then apply pre-trained transformations to it and cursor through it for transformation /// results. /// </summary> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="catalog">The context to use for data view creation.</param> /// <param name="data">The data to wrap around.</param> /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>, /// the schema definition is inferred from <typeparamref name="TRow"/>.</param> /// <returns>The constructed <see cref="IDataView"/>.</returns> public static IDataView ReadFromEnumerable <TRow>(this DataOperationsCatalog catalog, IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { catalog.Environment.CheckValue(data, nameof(data)); catalog.Environment.CheckValueOrNull(schemaDefinition); return(DataViewConstructionUtils.CreateFromEnumerable(catalog.Environment, data, schemaDefinition)); }
public static void SaveAsMd(this DataOperationsCatalog _, DataDebuggerPreview preview, TextWriter writer) { writer.WriteLine($"| {String.Join(" | ", preview.Schema.Select(s => s.Name))} |"); writer.WriteLine($"| {String.Join(" | ", preview.Schema.Select(s => "------"))} |"); foreach (var row in preview.RowView) { writer.WriteLine($" | {String.Join(" | ", row.Values.Select(x => x.Value.ToString()))} | "); } }
/// <summary> /// Creates a loader that loads SVM-light format files. <see cref="SvmLightLoader"/>. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="inputSize">The number of features in the Features column. If 0 is specified, the /// loader will determine it by looking at the file sample given in <paramref name="dataSample"/>.</param> /// <param name="numberOfRows">The number of rows from the sample to be used for determining the number of features.</param> /// <param name="zeroBased">If the file contains zero-based indices, this parameter should be set to true. If they are one-based /// it should be set to false.</param> /// <param name="dataSample">A data sample to be used for determining the number of features in the Features column.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[LoadingSvmLight](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadingSvmLight.cs)] /// ]]> /// </format> /// </example> public static SvmLightLoader CreateSvmLightLoader(this DataOperationsCatalog catalog, long?numberOfRows = null, int inputSize = 0, bool zeroBased = false, IMultiStreamSource dataSample = null) => new SvmLightLoader(CatalogUtils.GetEnvironment(catalog), new SvmLightLoader.Options() { InputSize = inputSize, NumberOfRows = numberOfRows, FeatureIndices = zeroBased ? SvmLightLoader.FeatureIndices.ZeroBased : SvmLightLoader.FeatureIndices.OneBased }, dataSample);
private static IDataView GetRegressionMetrics( IHostEnvironment env, IPredictor predictor, RoleMappedData roleMappedData, PermutationFeatureImportanceArguments input) { var roles = roleMappedData.Schema.GetColumnRoleNames(); var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value; var labelColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value; var pred = new RegressionPredictionTransformer <IPredictorProducing <float> >( env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName); var regressionCatalog = new RegressionCatalog(env); var permutationMetrics = regressionCatalog .PermutationFeatureImportance(pred, roleMappedData.Data, labelColumnName: labelColumnName, useFeatureWeightFilter: input.UseFeatureWeightFilter, numberOfExamplesToUse: input.NumberOfExamplesToUse, permutationCount: input.PermutationCount); var slotNames = GetSlotNames(roleMappedData.Schema); Contracts.Assert(slotNames.Length == permutationMetrics.Length, "Mismatch between number of feature slots and number of features permuted."); List <RegressionMetrics> metrics = new List <RegressionMetrics>(); for (int i = 0; i < permutationMetrics.Length; i++) { if (string.IsNullOrWhiteSpace(slotNames[i])) { continue; } var pMetric = permutationMetrics[i]; metrics.Add(new RegressionMetrics { FeatureName = slotNames[i], MeanAbsoluteError = pMetric.MeanAbsoluteError.Mean, MeanAbsoluteErrorStdErr = pMetric.MeanAbsoluteError.StandardError, MeanSquaredError = pMetric.MeanSquaredError.Mean, MeanSquaredErrorStdErr = pMetric.MeanSquaredError.StandardError, RootMeanSquaredError = pMetric.RootMeanSquaredError.Mean, RootMeanSquaredErrorStdErr = pMetric.RootMeanSquaredError.StandardError, LossFunction = pMetric.LossFunction.Mean, LossFunctionStdErr = pMetric.LossFunction.StandardError, RSquared = pMetric.RSquared.Mean, RSquaredStdErr = pMetric.RSquared.StandardError }); } var dataOps = new DataOperationsCatalog(env); var result = dataOps.LoadFromEnumerable(metrics); return(result); }
/// <summary> /// Load a <see cref="IDataView"/> from a text file containing features specified by feature names, /// using <see cref="SvmLightLoader"/>. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="path">The path to the file.</param> /// <param name="numberOfRows">The number of rows from the sample to be used for determining the set of feature names.</param> public static IDataView LoadFromSvmLightFileWithFeatureNames(this DataOperationsCatalog catalog, string path, long?numberOfRows = null) { Contracts.CheckNonEmpty(path, nameof(path)); var file = new MultiFileSource(path); var loader = catalog.CreateSvmLightLoaderWithFeatureNames(numberOfRows, file); return(loader.Load(file)); }
/// <summary> /// Load a <see cref="IDataView"/> from a text file using <see cref="SvmLightLoader"/>. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="path">The path to the file.</param> /// <param name="inputSize">The number of features in the Features column. If 0 is specified, the /// loader will determine it by looking at the file given in <paramref name="path"/>.</param> /// <param name="zeroBased">If the file contains zero-based indices, this parameter should be set to true. If they are one-based /// it should be set to false.</param> /// <param name="numberOfRows">The number of rows from the sample to be used for determining the number of features.</param> public static IDataView LoadFromSvmLightFile(this DataOperationsCatalog catalog, string path, long?numberOfRows = null, int inputSize = 0, bool zeroBased = false) { Contracts.CheckNonEmpty(path, nameof(path)); var file = new MultiFileSource(path); var loader = catalog.CreateSvmLightLoader(numberOfRows, inputSize, zeroBased, file); return(loader.Load(file)); }
/// <summary> /// Load a <see cref="IDataView"/> from a text file containing features specified by feature names, /// using <see cref="SvmLightLoader"/>. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="path">The path to the file.</param> /// <param name="numberOfRows">The number of rows from the sample to be used for determining the set of feature names.</param> public static IDataView LoadFromSvmLightFileWithFeatureNames(this DataOperationsCatalog catalog, string path, long?numberOfRows = null) { Contracts.CheckNonEmpty(path, nameof(path)); if (!File.Exists(path)) { throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); } var file = new MultiFileSource(path); var loader = catalog.CreateSvmLightLoaderWithFeatureNames(numberOfRows, file); return(loader.Load(file)); }
/// <summary> /// Load a <see cref="IDataView"/> from a text file using <see cref="SvmLightLoader"/>. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="path">The path to the file.</param> /// <param name="inputSize">The number of features in the Features column. If 0 is specified, the /// loader will determine it by looking at the file given in <paramref name="path"/>.</param> /// <param name="zeroBased">If the file contains zero-based indices, this parameter should be set to true. If they are one-based /// it should be set to false.</param> /// <param name="numberOfRows">The number of rows from the sample to be used for determining the number of features.</param> public static IDataView LoadFromSvmLightFile(this DataOperationsCatalog catalog, string path, long?numberOfRows = null, int inputSize = 0, bool zeroBased = false) { Contracts.CheckNonEmpty(path, nameof(path)); if (!File.Exists(path)) { throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); } var file = new MultiFileSource(path); var loader = catalog.CreateSvmLightLoader(numberOfRows, inputSize, zeroBased, file); return(loader.Load(file)); }
public static Output Split(IHostEnvironment env, Input input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(ModuleName); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var data = input.Data; var splitCol = DataOperationsCatalog.CreateSplitColumn(env, ref data, input.StratificationColumn); int n = input.NumFolds; var output = new Output { TrainData = new IDataView[n], TestData = new IDataView[n] }; // Construct per-fold datasets. double fraction = 1.0 / n; for (int i = 0; i < n; i++) { var trainData = new RangeFilter(host, new RangeFilter.Options { Column = splitCol, Min = i * fraction, Max = (i + 1) * fraction, Complement = true }, data); output.TrainData[i] = ColumnSelectingTransformer.CreateDrop(host, trainData, splitCol); var testData = new RangeFilter(host, new RangeFilter.Options { Column = splitCol, Min = i * fraction, Max = (i + 1) * fraction, Complement = false }, data); output.TestData[i] = ColumnSelectingTransformer.CreateDrop(host, testData, splitCol); } return(output); }
/// <summary> /// Save the <see cref="IDataView"/> in SVM-light format. Four columns can be saved: a label and a features column, /// and optionally a group ID column and an example weight column. /// </summary> /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param> /// <param name="data">The data view to save.</param> /// <param name="stream">The stream to write to.</param> /// <param name="zeroBasedIndexing">Whether to index the features starting at 0 or at 1.</param> /// <param name="binaryLabel">If set to true, saves 1 for positive labels, -1 for non-positive labels and 0 for NaN. /// Otherwise, saves the value of the label in the data view.</param> /// <param name="labelColumnName">The name of the column to be saved as the label column.</param> /// <param name="featureColumnName">The name of the column to be saved as the features column.</param> /// <param name="rowGroupColumnName">The name of the column to be saved as the group ID column. If null, a group ID column /// will not be saved.</param> /// <param name="exampleWeightColumnName">The name of the column to be saved as the weight column. If null, a weight column /// will not be saved.</param> public static void SaveInSvmLightFormat(this DataOperationsCatalog catalog, IDataView data, Stream stream, bool zeroBasedIndexing = false, bool binaryLabel = false, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string rowGroupColumnName = null, string exampleWeightColumnName = null) { var args = new SvmLightSaver.Arguments() { Zero = zeroBasedIndexing, Binary = binaryLabel, LabelColumnName = labelColumnName, FeatureColumnName = featureColumnName, RowGroupColumnName = rowGroupColumnName, ExampleWeightColumnName = exampleWeightColumnName }; var saver = new SvmLightSaver(CatalogUtils.GetEnvironment(catalog), args); saver.SaveData(stream, data, data.Schema.Select(col => col.Index).ToArray()); }
/// <summary> /// Split the dataset into the train set and test set according to the given fraction. /// Respects the <paramref name="stratificationColumn"/> if provided. /// </summary> /// <typeparam name="T">The tuple describing the data schema.</typeparam> /// <param name="catalog">The training catalog.</param> /// <param name="data">The dataset to split.</param> /// <param name="testFraction">The fraction of data to go into the test set.</param> /// <param name="stratificationColumn">Optional selector for the column to use as a stratification column. If two examples share the same value of the <paramref name="stratificationColumn"/> /// (if provided), they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from train to the test set. /// If this optional parameter is not provided, a stratification columns will be generated, and its values will be random numbers .</param> /// <param name="seed">Optional parameter used in combination with the <paramref name="stratificationColumn"/>. /// If the <paramref name="stratificationColumn"/> is not provided, the random numbers generated to create it, will use this seed as value. /// And if it is not provided, the default value will be used.</param> /// <returns>A pair of datasets, for the train and test set.</returns> public static (DataView <T> trainSet, DataView <T> testSet) TrainTestSplit <T>(this DataOperationsCatalog catalog, DataView <T> data, double testFraction = 0.1, Func <T, PipelineColumn> stratificationColumn = null, uint?seed = null) {
/// <summary> /// Configures a loader for text files. /// </summary> /// <typeparam name="TShape">The type shape parameter, which must be a valid-schema shape. As a practical /// matter this is generally not explicitly defined from the user, but is instead inferred from the return /// type of the <paramref name="func"/> where one takes an input <see cref="Context"/> and uses it to compose /// a shape-type instance describing what the columns are and how to load them from the file.</typeparam> /// <param name="catalog">The catalog.</param> /// <param name="func">The delegate that describes what fields to read from the text file, as well as /// describing their input type. The way in which it works is that the delegate is fed a <see cref="Context"/>, /// and the user composes a shape type with <see cref="PipelineColumn"/> instances out of that <see cref="Context"/>. /// The resulting data will have columns with the names corresponding to their names in the shape type.</param> /// <param name="files">Input files.</param> /// <param name="hasHeader">Data file has header with feature names.</param> /// <param name="separator">Text field separator.</param> /// <param name="allowQuoting">Whether the input -may include quoted values, which can contain separator /// characters, colons, and distinguish empty values from missing values. When true, consecutive separators /// denote a missing value and an empty value is denoted by <c>""</c>. When false, consecutive separators /// denote an empty value.</param> /// <param name="allowSparse">Whether the input may include sparse representations.</param> /// <param name="trimWhitspace">Remove trailing whitespace from lines.</param> /// <returns>A configured statically-typed loader for text files.</returns> public static DataLoader <IMultiStreamSource, TShape> CreateTextLoader <[IsShape] TShape>( this DataOperationsCatalog catalog, Func <Context, TShape> func, IMultiStreamSource files = null, bool hasHeader = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true, bool trimWhitspace = false) => CreateLoader(catalog.Environment, func, files, separator, hasHeader, allowQuoting, allowSparse, trimWhitspace);
public static IHostEnvironment GetEnvironment(this DataOperationsCatalog catalog) => Contracts.CheckRef(catalog, nameof(catalog)).Environment;
private static IDataView GetBinaryMetrics( IHostEnvironment env, IPredictor predictor, RoleMappedData roleMappedData, PermutationFeatureImportanceArguments input) { var roles = roleMappedData.Schema.GetColumnRoleNames(); var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value; var labelColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value; var pred = new BinaryPredictionTransformer <IPredictorProducing <float> >( env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName); var binaryCatalog = new BinaryClassificationCatalog(env); var permutationMetrics = binaryCatalog .PermutationFeatureImportance(pred, roleMappedData.Data, labelColumnName: labelColumnName, useFeatureWeightFilter: input.UseFeatureWeightFilter, numberOfExamplesToUse: input.NumberOfExamplesToUse, permutationCount: input.PermutationCount); var slotNames = GetSlotNames(roleMappedData.Schema); Contracts.Assert(slotNames.Length == permutationMetrics.Length, "Mismatch between number of feature slots and number of features permuted."); List <BinaryMetrics> metrics = new List <BinaryMetrics>(); for (int i = 0; i < permutationMetrics.Length; i++) { if (string.IsNullOrWhiteSpace(slotNames[i])) { continue; } var pMetric = permutationMetrics[i]; metrics.Add(new BinaryMetrics { FeatureName = slotNames[i], AreaUnderRocCurve = pMetric.AreaUnderRocCurve.Mean, AreaUnderRocCurveStdErr = pMetric.AreaUnderRocCurve.StandardError, Accuracy = pMetric.Accuracy.Mean, AccuracyStdErr = pMetric.Accuracy.StandardError, PositivePrecision = pMetric.PositivePrecision.Mean, PositivePrecisionStdErr = pMetric.PositivePrecision.StandardError, PositiveRecall = pMetric.PositiveRecall.Mean, PositiveRecallStdErr = pMetric.PositiveRecall.StandardError, NegativePrecision = pMetric.NegativePrecision.Mean, NegativePrecisionStdErr = pMetric.NegativePrecision.StandardError, NegativeRecall = pMetric.NegativeRecall.Mean, NegativeRecallStdErr = pMetric.NegativeRecall.StandardError, F1Score = pMetric.F1Score.Mean, F1ScoreStdErr = pMetric.F1Score.StandardError, AreaUnderPrecisionRecallCurve = pMetric.AreaUnderPrecisionRecallCurve.Mean, AreaUnderPrecisionRecallCurveStdErr = pMetric.AreaUnderPrecisionRecallCurve.StandardError }); } var dataOps = new DataOperationsCatalog(env); var result = dataOps.LoadFromEnumerable(metrics); return(result); }
private static IDataView GetMulticlassMetrics( IHostEnvironment env, IPredictor predictor, RoleMappedData roleMappedData, PermutationFeatureImportanceArguments input) { var roles = roleMappedData.Schema.GetColumnRoleNames(); var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value; var labelColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value; var pred = new MulticlassPredictionTransformer <IPredictorProducing <VBuffer <float> > >( env, predictor as IPredictorProducing <VBuffer <float> >, roleMappedData.Data.Schema, featureColumnName, labelColumnName); var multiclassCatalog = new MulticlassClassificationCatalog(env); var permutationMetrics = multiclassCatalog .PermutationFeatureImportance(pred, roleMappedData.Data, labelColumnName: labelColumnName, useFeatureWeightFilter: input.UseFeatureWeightFilter, numberOfExamplesToUse: input.NumberOfExamplesToUse, permutationCount: input.PermutationCount); var slotNames = GetSlotNames(roleMappedData.Schema); Contracts.Assert(slotNames.Length == permutationMetrics.Length, "Mismatch between number of feature slots and number of features permuted."); List <MulticlassMetrics> metrics = new List <MulticlassMetrics>(); for (int i = 0; i < permutationMetrics.Length; i++) { if (string.IsNullOrWhiteSpace(slotNames[i])) { continue; } var pMetric = permutationMetrics[i]; metrics.Add(new MulticlassMetrics { FeatureName = slotNames[i], MacroAccuracy = pMetric.MacroAccuracy.Mean, MacroAccuracyStdErr = pMetric.MacroAccuracy.StandardError, MicroAccuracy = pMetric.MicroAccuracy.Mean, MicroAccuracyStdErr = pMetric.MicroAccuracy.StandardError, LogLoss = pMetric.LogLoss.Mean, LogLossStdErr = pMetric.LogLoss.StandardError, LogLossReduction = pMetric.LogLossReduction.Mean, LogLossReductionStdErr = pMetric.LogLossReduction.StandardError, TopKAccuracy = pMetric.TopKAccuracy.Mean, TopKAccuracyStdErr = pMetric.TopKAccuracy.StandardError, PerClassLogLoss = pMetric.PerClassLogLoss.Select(x => x.Mean).ToArray(), PerClassLogLossStdErr = pMetric.PerClassLogLoss.Select(x => x.StandardError).ToArray() });; } // Convert unknown size vectors to known size. var metric = metrics.First(); SchemaDefinition schema = SchemaDefinition.Create(typeof(MulticlassMetrics)); ConvertVectorToKnownSize(nameof(metric.PerClassLogLoss), metric.PerClassLogLoss.Length, ref schema); ConvertVectorToKnownSize(nameof(metric.PerClassLogLossStdErr), metric.PerClassLogLossStdErr.Length, ref schema); var dataOps = new DataOperationsCatalog(env); var result = dataOps.LoadFromEnumerable(metrics, schema); return(result); }
public static IDataView LoadFromXpoObject(this DataOperationsCatalog Instance, Session session, Type ObjectType, string Properties, CriteriaOperator Criteria, string TextPropertyName, string LabelPropertyName) { DevExpress.Xpo.XPView View = new DevExpress.Xpo.XPView(session, ObjectType, Properties, Criteria); return(new XpoInputObjectDataView(View, TextPropertyName, LabelPropertyName)); }
public static IDataView ReadTeamStatistics(this DataOperationsCatalog data, string file) { return(data.LoadFromTextFile <TeamStatistics>(file, separatorChar: ',', hasHeader: true)); }