/// <summary>
        /// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.Binning"/> mode.
        /// The values are assigned into bins with equal density.
        /// </summary>
        /// <param name="catalog">The transform catalog</param>
        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
        /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
        /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
        /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
        public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog,
                                                            string outputColumnName, string inputColumnName = null,
                                                            long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
                                                            bool fixZero             = NormalizingEstimator.Defaults.EnsureZeroUntouched,
                                                            int maximumBinCount      = NormalizingEstimator.Defaults.MaximumBinCount)
        {
            var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount);

            return(new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions));
        }
예제 #2
0
        /// <summary>
        /// Create a <see cref="CategoricalImputerEstimator"/>, which fills in the missing values in a column with the most frequent value.
        /// Supports Floats, Doubles, and Strings.
        /// A string is assumed "missing" if it is empty.
        /// </summary>
        /// <param name="catalog">Transform Catalog</param>
        /// <param name="columns">List of <see cref="InputOutputColumnPair"/> to fill in missing values</param>
        /// <returns><see cref="CategoricalImputerEstimator"/></returns>
        public static CategoricalImputerEstimator ImputeCategories(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
        {
            var options = new CategoricalImputerEstimator.Options
            {
                Columns = columns.Select(x => new CategoricalImputerEstimator.Column
                {
                    Name = x.OutputColumnName, Source = x.InputColumnName ?? x.OutputColumnName
                }).ToArray(),
            };

            return(new CategoricalImputerEstimator(CatalogUtils.GetEnvironment(catalog), options));
        }
        /// <summary>
        /// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.SupervisedBinning"/> mode.
        /// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column.
        /// </summary>
        /// <param name="catalog">The transform catalog</param>
        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
        /// <param name="labelColumnName">Name of the label column for supervised binning.</param>
        /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
        /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
        /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
        /// <param name="mininimumExamplesPerBin">Minimum number of examples per bin.</param>
        public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog,
                                                                      string outputColumnName, string inputColumnName = null,
                                                                      string labelColumnName      = DefaultColumnNames.Label,
                                                                      long maximumExampleCount    = NormalizingEstimator.Defaults.MaximumExampleCount,
                                                                      bool fixZero                = NormalizingEstimator.Defaults.EnsureZeroUntouched,
                                                                      int maximumBinCount         = NormalizingEstimator.Defaults.MaximumBinCount,
                                                                      int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize)
        {
            var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin);

            return(new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions));
        }
예제 #4
0
        public static IEstimator <ITransformer> InferTransforms(this TransformsCatalog catalog, IDataView data, string label)
        {
            var mlContext           = new MLContext();
            var suggestedTransforms = TransformInferenceApi.InferTransforms(mlContext, data, label);
            var estimators          = suggestedTransforms.Select(s => s.Estimator);
            var pipeline            = new EstimatorChain <ITransformer>();

            foreach (var estimator in estimators)
            {
                pipeline = pipeline.Append(estimator);
            }
            return(pipeline);
        }
예제 #5
0
        /// <summary>
        /// Create a <see cref="CategoricalImputerEstimator"/>, which fills in the missing values in a column with the most frequent value.
        /// Supports Floats, Doubles, and Strings.
        /// A string is assumed "missing" if it is empty.
        /// </summary>
        /// <param name="catalog">Transform Catalog</param>
        /// <param name="outputColumnName">Output column name</param>
        /// <param name="inputColumnName">Input column name, if null defaults to <paramref name="outputColumnName"/></param>
        /// <returns><see cref="CategoricalImputerEstimator"/></returns>
        public static CategoricalImputerEstimator ImputeCategories(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null)
        {
            var options = new CategoricalImputerEstimator.Options
            {
                Columns = new CategoricalImputerEstimator.Column[1] {
                    new CategoricalImputerEstimator.Column()
                    {
                        Name = outputColumnName, Source = inputColumnName ?? outputColumnName
                    }
                }
            };

            return(new CategoricalImputerEstimator(CatalogUtils.GetEnvironment(catalog), options));
        }
예제 #6
0
        /// <summary>
        /// Transforms a categorical column into a set of features that includes the count of each label class,
        /// the log-odds for each label class and the back-off indicator.
        /// </summary>
        /// <param name="catalog">The transforms catalog.</param>
        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
        /// <param name="labelColumn">The name of the label column.</param>
        /// <param name="builder">The builder that creates the count tables from the training data.</param>
        /// <param name="priorCoefficient">The coefficient with which to apply the prior smoothing to the features.</param>
        /// <param name="laplaceScale">The Laplacian noise diversity/scale-parameter. Recommended values are between 0 and 1. Note that the noise
        /// will only be applied if the estimator is part of an <see cref="EstimatorChain{TLastTransformer}"/>, when fitting the next estimator in the chain.</param>
        /// <param name="numberOfBits">The number of bits to hash the input into. Must be between 1 and 31, inclusive.</param>
        /// <param name="combine">In case the input is a vector column, indicates whether the values should be combined into a single hash to create a single
        /// count table, or be left as a vector of hashes with multiple count tables.</param>
        /// <param name="hashingSeed">The seed used for hashing the input columns.</param>
        public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null,
                                                                     string labelColumn            = DefaultColumnNames.Label,
                                                                     CountTableBuilderBase builder = null,
                                                                     float priorCoefficient        = CountTableTransformer.Defaults.PriorCoefficient,
                                                                     float laplaceScale            = CountTableTransformer.Defaults.LaplaceScale,
                                                                     int numberOfBits = HashingEstimator.Defaults.NumberOfBits,
                                                                     bool combine     = HashingEstimator.Defaults.Combine,
                                                                     uint hashingSeed = HashingEstimator.Defaults.Seed)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckNonEmpty(outputColumnName, nameof(outputColumnName));

            inputColumnName = string.IsNullOrEmpty(inputColumnName) ? outputColumnName : inputColumnName;
            builder         = builder ?? new CMCountTableBuilder();

            return(new CountTargetEncodingEstimator(env, labelColumn,
                                                    new[] { new CountTableEstimator.ColumnOptions(outputColumnName, inputColumnName, builder, priorCoefficient, laplaceScale) },
                                                    numberOfBits, combine, hashingSeed));
        }
예제 #7
0
        /// <summary>
        /// Transforms a categorical column into a set of features that includes the count of each label class,
        /// the log-odds for each label class and the back-off indicator.
        /// </summary>
        /// <param name="catalog">The transforms catalog.</param>
        /// <param name="columns">The input and output columns.</param>
        /// <param name="labelColumn">The name of the label column.</param>
        /// <param name="builder">The builder that creates the count tables from the training data.</param>
        /// <param name="priorCoefficient">The coefficient with which to apply the prior smoothing to the features.</param>
        /// <param name="laplaceScale">The Laplacian noise diversity/scale-parameter. Recommended values are between 0 and 1. Note that the noise
        /// will only be applied if the estimator is part of an <see cref="EstimatorChain{TLastTransformer}"/>, when fitting the next estimator in the chain.</param>
        /// <param name="sharedTable">Indicates whether to keep counts for all columns and slots in one shared count table. If true, the keys in the count table
        /// will include a hash of the column and slot indices.</param>
        /// <param name="numberOfBits">The number of bits to hash the input into. Must be between 1 and 31, inclusive.</param>
        /// <param name="combine">In case the input is a vector column, indicates whether the values should be combined into a single hash to create a single
        /// count table, or be left as a vector of hashes with multiple count tables.</param>
        /// <param name="hashingSeed">The seed used for hashing the input columns.</param>
        /// <returns></returns>
        public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog,
                                                                     InputOutputColumnPair[] columns, string labelColumn = DefaultColumnNames.Label,
                                                                     CountTableBuilderBase builder = null,
                                                                     float priorCoefficient        = CountTableTransformer.Defaults.PriorCoefficient,
                                                                     float laplaceScale            = CountTableTransformer.Defaults.LaplaceScale,
                                                                     bool sharedTable = CountTableTransformer.Defaults.SharedTable,
                                                                     int numberOfBits = HashingEstimator.Defaults.NumberOfBits,
                                                                     bool combine     = HashingEstimator.Defaults.Combine,
                                                                     uint hashingSeed = HashingEstimator.Defaults.Seed)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckValue(columns, nameof(columns));

            builder = builder ?? new CMCountTableBuilder();

            CountTargetEncodingEstimator estimator;

            if (sharedTable)
            {
                var columnOptions = new CountTableEstimator.SharedColumnOptions[columns.Length];
                for (int i = 0; i < columns.Length; i++)
                {
                    columnOptions[i] = new CountTableEstimator.SharedColumnOptions(
                        columns[i].OutputColumnName, columns[i].InputColumnName, priorCoefficient, laplaceScale);
                }
                estimator = new CountTargetEncodingEstimator(env, labelColumn, columnOptions, builder, numberOfBits, combine, hashingSeed);
            }
            else
            {
                var columnOptions = new CountTableEstimator.ColumnOptions[columns.Length];
                for (int i = 0; i < columns.Length; i++)
                {
                    columnOptions[i] = new CountTableEstimator.ColumnOptions(
                        columns[i].OutputColumnName, columns[i].InputColumnName, builder, priorCoefficient, laplaceScale);
                }
                estimator = new CountTargetEncodingEstimator(env, labelColumn, columnOptions, numberOfBits: numberOfBits, combine: combine, hashingSeed: hashingSeed);
            }
            return(estimator);
        }
예제 #8
0
        public static OnnxTransformOutput ApplyOnnxModel(IHostEnvironment env, OnnxTransformInput input)
        {
            var host = EntryPointUtils.CheckArgsAndCreateHost(env, "OnnxTransform", input);

            var inputColumns  = input.InputColumns ?? (Array.Empty <string>());
            var outputColumns = input.OutputColumns ?? (Array.Empty <string>());

            var transformsCatalog    = new TransformsCatalog(host);
            var onnxScoringEstimator = OnnxCatalog.ApplyOnnxModel(transformsCatalog,
                                                                  outputColumns,
                                                                  inputColumns,
                                                                  input.ModelFile,
                                                                  input.GpuDeviceId,
                                                                  input.FallbackToCpu);

            var view = onnxScoringEstimator.Fit(input.Data).Transform(input.Data);

            return(new OnnxTransformOutput()
            {
                Model = new TransformModelImpl(host, view, input.Data),
                OutputData = view
            });
        }
 internal ConversionTransforms(TransformsCatalog owner) : base(owner)
 {
 }
 internal CategoricalTransforms(TransformsCatalog owner) : base(owner)
 {
 }
 protected SubCatalogBase(TransformsCatalog owner)
 {
     Environment = owner.Environment;
 }
 internal FeatureSelectionTransforms(TransformsCatalog owner) : base(owner)
 {
 }
 internal ProjectionTransforms(TransformsCatalog owner) : base(owner)
 {
 }
 internal TextTransforms(TransformsCatalog owner) : base(owner)
 {
 }
예제 #15
0
 public ConversionTransforms(TransformsCatalog owner) : base(owner)
 {
 }
예제 #16
0
 /// <summary>
 /// Initializes a new instance of <see cref="MissingValueReplacingEstimator"/>
 /// </summary>
 /// <param name="catalog">The transform's catalog.</param>
 /// <param name="inputColumn">The name of the input column.</param>
 /// <param name="outputColumn">The optional name of the output column,
 /// If not provided, the <paramref name="inputColumn"/> will be replaced with the results of the transforms.</param>
 /// <param name="replacementKind">The type of replacement to use as specified in <see cref="MissingValueReplacingTransformer.ColumnInfo.ReplacementMode"/></param>
 public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog,
                                                                   string inputColumn,
                                                                   string outputColumn = null,
                                                                   MissingValueReplacingTransformer.ColumnInfo.ReplacementMode replacementKind = MissingValueReplacingEstimator.Defaults.ReplacementMode)
 => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, replacementKind);
예제 #17
0
 /// <summary>
 /// Transforms a categorical column into a set of features that includes the count of each label class,
 /// the log-odds for each label class and the back-off indicator.
 /// </summary>
 /// <param name="catalog">The transforms catalog.</param>
 /// <param name="columns">The input and output columns.</param>
 /// <param name="initialCounts">A previously trained count table containing initial counts.</param>
 /// <param name="labelColumn">The name of the label column.</param>
 /// <returns></returns>
 public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog,
                                                              InputOutputColumnPair[] columns, CountTargetEncodingTransformer initialCounts, string labelColumn = "Label")
 {
     return(new CountTargetEncodingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, initialCounts, columns));
 }
예제 #18
0
 /// <summary>
 /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Applies the imputation strategy on
 /// a filtered list of columns in the IDataView. Columns that are excluded will have the default value for that data type used when a row
 /// is imputed. Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the
 /// purpose of this estimator.
 /// </summary>
 /// <param name="catalog">The transform catalog.</param>
 /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param>
 /// <param name="grainColumns">List of columns to use as grains</param>
 /// <param name="filterColumns">List of columns to filter. If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Exclude"/> than columns in the list will be ignored.
 /// If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Include"/> than values in the list are the only columns imputed.</param>
 /// <param name="filterMode">Whether the list <paramref name="filterColumns"/> should include or exclude those columns.</param>
 /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param>
 /// <param name="suppressTypeErrors">Suppress the errors that would occur if a column and impute mode are incompatible. If true, will skip the column and use the default value. If false, will stop and throw an error.</param>
 public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn,
                                                                         string[] grainColumns, string[] filterColumns, TimeSeriesImputerEstimator.FilterMode filterMode = TimeSeriesImputerEstimator.FilterMode.Exclude,
                                                                         TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill,
                                                                         bool suppressTypeErrors = false)
 => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, filterColumns, filterMode, imputeMode, suppressTypeErrors);
예제 #19
0
 /// <summary>
 /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Operates on all columns in the IDataView.
 /// Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the
 /// purpose of this estimator. Other column types will have the default value placed if a row is imputed.
 /// </summary>
 /// <param name="catalog">The transform catalog.</param>
 /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param>
 /// <param name="grainColumns">List of columns to use as grains</param>
 /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param>
 public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn, string[] grainColumns,
                                                                         TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill)
 => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, null, TimeSeriesImputerEstimator.FilterMode.NoFilter, imputeMode, true);
예제 #20
0
 public ProjectionTransforms(TransformsCatalog owner) : base(owner)
 {
 }
예제 #21
0
 public TextTransforms(TransformsCatalog owner) : base(owner)
 {
 }
예제 #22
0
 /// <summary>
 /// Transforms a categorical column into a set of features that includes the count of each label class,
 /// the log-odds for each label class and the back-off indicator.
 /// </summary>
 /// <param name="catalog">The transforms catalog.</param>
 /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
 /// <param name="initialCounts">A previously trained count table containing initial counts.</param>
 /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
 /// <param name="labelColumn">The name of the label column.</param>
 /// <returns></returns>
 public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, string outputColumnName,
                                                              CountTargetEncodingTransformer initialCounts,
                                                              string inputColumnName = null, string labelColumn = "Label")
 {
     return(new CountTargetEncodingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, initialCounts, new[] { new InputOutputColumnPair(outputColumnName, inputColumnName) }));
 }
 /// <summary>
 /// Create a <see cref="DateTimeEstimator"/>, which splits up the input column specified by <paramref name="inputColumnName"/>
 /// into all its individual datetime components. Input column must be of type Int64 representing the number of seconds since the unix epoc.
 /// This transformer will append the <paramref name="columnPrefix"/> to all the output columns. If you specify a country,
 /// Holiday details will be looked up for that country as well.
 /// </summary>
 /// <param name="catalog">Transform catalog</param>
 /// <param name="inputColumnName">Input column name</param>
 /// <param name="columnPrefix">Prefix to add to the generated columns</param>
 /// <param name="country">Country name to get holiday details for</param>
 /// <returns><see cref="DateTimeEstimator"/></returns>
 public static DateTimeEstimator FeaturizeDateTime(this TransformsCatalog catalog, string inputColumnName, string columnPrefix, DateTimeEstimator.HolidayList country = DateTimeEstimator.HolidayList.None)
 => new DateTimeEstimator(CatalogUtils.GetEnvironment(catalog), inputColumnName, columnPrefix, country);
예제 #24
0
 public FeatureSelectionTransforms(TransformsCatalog owner) : base(owner)
 {
 }
예제 #25
0
 /// <summary>
 /// Initializes a new instance of <see cref="MissingValueReplacingEstimator"/>
 /// </summary>
 /// <param name="catalog">The transform's catalog.</param>
 /// <param name="columns">The name of the columns to use, and per-column transformation configuraiton.</param>
 public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingTransformer.ColumnInfo[] columns)
 => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
예제 #26
0
 public static IHostEnvironment GetEnvironment(this TransformsCatalog catalog) => Contracts.CheckRef(catalog, nameof(catalog)).Environment;