/// <summary> /// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.Binning"/> mode. /// The values are assigned into bins with equal density. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param> public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount) { var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount); return(new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions)); }
/// <summary> /// Create a <see cref="CategoricalImputerEstimator"/>, which fills in the missing values in a column with the most frequent value. /// Supports Floats, Doubles, and Strings. /// A string is assumed "missing" if it is empty. /// </summary> /// <param name="catalog">Transform Catalog</param> /// <param name="columns">List of <see cref="InputOutputColumnPair"/> to fill in missing values</param> /// <returns><see cref="CategoricalImputerEstimator"/></returns> public static CategoricalImputerEstimator ImputeCategories(this TransformsCatalog catalog, params InputOutputColumnPair[] columns) { var options = new CategoricalImputerEstimator.Options { Columns = columns.Select(x => new CategoricalImputerEstimator.Column { Name = x.OutputColumnName, Source = x.InputColumnName ?? x.OutputColumnName }).ToArray(), }; return(new CategoricalImputerEstimator(CatalogUtils.GetEnvironment(catalog), options)); }
/// <summary> /// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.SupervisedBinning"/> mode. /// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="labelColumnName">Name of the label column for supervised binning.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param> /// <param name="mininimumExamplesPerBin">Minimum number of examples per bin.</param> public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, string labelColumnName = DefaultColumnNames.Label, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize) { var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin); return(new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions)); }
public static IEstimator <ITransformer> InferTransforms(this TransformsCatalog catalog, IDataView data, string label) { var mlContext = new MLContext(); var suggestedTransforms = TransformInferenceApi.InferTransforms(mlContext, data, label); var estimators = suggestedTransforms.Select(s => s.Estimator); var pipeline = new EstimatorChain <ITransformer>(); foreach (var estimator in estimators) { pipeline = pipeline.Append(estimator); } return(pipeline); }
/// <summary> /// Create a <see cref="CategoricalImputerEstimator"/>, which fills in the missing values in a column with the most frequent value. /// Supports Floats, Doubles, and Strings. /// A string is assumed "missing" if it is empty. /// </summary> /// <param name="catalog">Transform Catalog</param> /// <param name="outputColumnName">Output column name</param> /// <param name="inputColumnName">Input column name, if null defaults to <paramref name="outputColumnName"/></param> /// <returns><see cref="CategoricalImputerEstimator"/></returns> public static CategoricalImputerEstimator ImputeCategories(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null) { var options = new CategoricalImputerEstimator.Options { Columns = new CategoricalImputerEstimator.Column[1] { new CategoricalImputerEstimator.Column() { Name = outputColumnName, Source = inputColumnName ?? outputColumnName } } }; return(new CategoricalImputerEstimator(CatalogUtils.GetEnvironment(catalog), options)); }
/// <summary> /// Transforms a categorical column into a set of features that includes the count of each label class, /// the log-odds for each label class and the back-off indicator. /// </summary> /// <param name="catalog">The transforms catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="labelColumn">The name of the label column.</param> /// <param name="builder">The builder that creates the count tables from the training data.</param> /// <param name="priorCoefficient">The coefficient with which to apply the prior smoothing to the features.</param> /// <param name="laplaceScale">The Laplacian noise diversity/scale-parameter. Recommended values are between 0 and 1. Note that the noise /// will only be applied if the estimator is part of an <see cref="EstimatorChain{TLastTransformer}"/>, when fitting the next estimator in the chain.</param> /// <param name="numberOfBits">The number of bits to hash the input into. Must be between 1 and 31, inclusive.</param> /// <param name="combine">In case the input is a vector column, indicates whether the values should be combined into a single hash to create a single /// count table, or be left as a vector of hashes with multiple count tables.</param> /// <param name="hashingSeed">The seed used for hashing the input columns.</param> public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, string labelColumn = DefaultColumnNames.Label, CountTableBuilderBase builder = null, float priorCoefficient = CountTableTransformer.Defaults.PriorCoefficient, float laplaceScale = CountTableTransformer.Defaults.LaplaceScale, int numberOfBits = HashingEstimator.Defaults.NumberOfBits, bool combine = HashingEstimator.Defaults.Combine, uint hashingSeed = HashingEstimator.Defaults.Seed) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckNonEmpty(outputColumnName, nameof(outputColumnName)); inputColumnName = string.IsNullOrEmpty(inputColumnName) ? outputColumnName : inputColumnName; builder = builder ?? new CMCountTableBuilder(); return(new CountTargetEncodingEstimator(env, labelColumn, new[] { new CountTableEstimator.ColumnOptions(outputColumnName, inputColumnName, builder, priorCoefficient, laplaceScale) }, numberOfBits, combine, hashingSeed)); }
/// <summary> /// Transforms a categorical column into a set of features that includes the count of each label class, /// the log-odds for each label class and the back-off indicator. /// </summary> /// <param name="catalog">The transforms catalog.</param> /// <param name="columns">The input and output columns.</param> /// <param name="labelColumn">The name of the label column.</param> /// <param name="builder">The builder that creates the count tables from the training data.</param> /// <param name="priorCoefficient">The coefficient with which to apply the prior smoothing to the features.</param> /// <param name="laplaceScale">The Laplacian noise diversity/scale-parameter. Recommended values are between 0 and 1. Note that the noise /// will only be applied if the estimator is part of an <see cref="EstimatorChain{TLastTransformer}"/>, when fitting the next estimator in the chain.</param> /// <param name="sharedTable">Indicates whether to keep counts for all columns and slots in one shared count table. If true, the keys in the count table /// will include a hash of the column and slot indices.</param> /// <param name="numberOfBits">The number of bits to hash the input into. Must be between 1 and 31, inclusive.</param> /// <param name="combine">In case the input is a vector column, indicates whether the values should be combined into a single hash to create a single /// count table, or be left as a vector of hashes with multiple count tables.</param> /// <param name="hashingSeed">The seed used for hashing the input columns.</param> /// <returns></returns> public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, InputOutputColumnPair[] columns, string labelColumn = DefaultColumnNames.Label, CountTableBuilderBase builder = null, float priorCoefficient = CountTableTransformer.Defaults.PriorCoefficient, float laplaceScale = CountTableTransformer.Defaults.LaplaceScale, bool sharedTable = CountTableTransformer.Defaults.SharedTable, int numberOfBits = HashingEstimator.Defaults.NumberOfBits, bool combine = HashingEstimator.Defaults.Combine, uint hashingSeed = HashingEstimator.Defaults.Seed) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); builder = builder ?? new CMCountTableBuilder(); CountTargetEncodingEstimator estimator; if (sharedTable) { var columnOptions = new CountTableEstimator.SharedColumnOptions[columns.Length]; for (int i = 0; i < columns.Length; i++) { columnOptions[i] = new CountTableEstimator.SharedColumnOptions( columns[i].OutputColumnName, columns[i].InputColumnName, priorCoefficient, laplaceScale); } estimator = new CountTargetEncodingEstimator(env, labelColumn, columnOptions, builder, numberOfBits, combine, hashingSeed); } else { var columnOptions = new CountTableEstimator.ColumnOptions[columns.Length]; for (int i = 0; i < columns.Length; i++) { columnOptions[i] = new CountTableEstimator.ColumnOptions( columns[i].OutputColumnName, columns[i].InputColumnName, builder, priorCoefficient, laplaceScale); } estimator = new CountTargetEncodingEstimator(env, labelColumn, columnOptions, numberOfBits: numberOfBits, combine: combine, hashingSeed: hashingSeed); } return(estimator); }
public static OnnxTransformOutput ApplyOnnxModel(IHostEnvironment env, OnnxTransformInput input) { var host = EntryPointUtils.CheckArgsAndCreateHost(env, "OnnxTransform", input); var inputColumns = input.InputColumns ?? (Array.Empty <string>()); var outputColumns = input.OutputColumns ?? (Array.Empty <string>()); var transformsCatalog = new TransformsCatalog(host); var onnxScoringEstimator = OnnxCatalog.ApplyOnnxModel(transformsCatalog, outputColumns, inputColumns, input.ModelFile, input.GpuDeviceId, input.FallbackToCpu); var view = onnxScoringEstimator.Fit(input.Data).Transform(input.Data); return(new OnnxTransformOutput() { Model = new TransformModelImpl(host, view, input.Data), OutputData = view }); }
internal ConversionTransforms(TransformsCatalog owner) : base(owner) { }
internal CategoricalTransforms(TransformsCatalog owner) : base(owner) { }
protected SubCatalogBase(TransformsCatalog owner) { Environment = owner.Environment; }
internal FeatureSelectionTransforms(TransformsCatalog owner) : base(owner) { }
internal ProjectionTransforms(TransformsCatalog owner) : base(owner) { }
internal TextTransforms(TransformsCatalog owner) : base(owner) { }
public ConversionTransforms(TransformsCatalog owner) : base(owner) { }
/// <summary> /// Initializes a new instance of <see cref="MissingValueReplacingEstimator"/> /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="inputColumn">The name of the input column.</param> /// <param name="outputColumn">The optional name of the output column, /// If not provided, the <paramref name="inputColumn"/> will be replaced with the results of the transforms.</param> /// <param name="replacementKind">The type of replacement to use as specified in <see cref="MissingValueReplacingTransformer.ColumnInfo.ReplacementMode"/></param> public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, string inputColumn, string outputColumn = null, MissingValueReplacingTransformer.ColumnInfo.ReplacementMode replacementKind = MissingValueReplacingEstimator.Defaults.ReplacementMode) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, replacementKind);
/// <summary> /// Transforms a categorical column into a set of features that includes the count of each label class, /// the log-odds for each label class and the back-off indicator. /// </summary> /// <param name="catalog">The transforms catalog.</param> /// <param name="columns">The input and output columns.</param> /// <param name="initialCounts">A previously trained count table containing initial counts.</param> /// <param name="labelColumn">The name of the label column.</param> /// <returns></returns> public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, InputOutputColumnPair[] columns, CountTargetEncodingTransformer initialCounts, string labelColumn = "Label") { return(new CountTargetEncodingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, initialCounts, columns)); }
/// <summary> /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Applies the imputation strategy on /// a filtered list of columns in the IDataView. Columns that are excluded will have the default value for that data type used when a row /// is imputed. Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the /// purpose of this estimator. /// </summary> /// <param name="catalog">The transform catalog.</param> /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param> /// <param name="grainColumns">List of columns to use as grains</param> /// <param name="filterColumns">List of columns to filter. If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Exclude"/> than columns in the list will be ignored. /// If <paramref name="filterMode"/> is <see cref="TimeSeriesImputerEstimator.FilterMode.Include"/> than values in the list are the only columns imputed.</param> /// <param name="filterMode">Whether the list <paramref name="filterColumns"/> should include or exclude those columns.</param> /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param> /// <param name="suppressTypeErrors">Suppress the errors that would occur if a column and impute mode are incompatible. If true, will skip the column and use the default value. If false, will stop and throw an error.</param> public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn, string[] grainColumns, string[] filterColumns, TimeSeriesImputerEstimator.FilterMode filterMode = TimeSeriesImputerEstimator.FilterMode.Exclude, TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill, bool suppressTypeErrors = false) => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, filterColumns, filterMode, imputeMode, suppressTypeErrors);
/// <summary> /// Create a <see cref="TimeSeriesImputerEstimator"/>, Imputes missing rows and column data per grain. Operates on all columns in the IDataView. /// Currently only float/double/string columns are supported for imputation strategies, and an empty string is considered "missing" for the /// purpose of this estimator. Other column types will have the default value placed if a row is imputed. /// </summary> /// <param name="catalog">The transform catalog.</param> /// <param name="timeSeriesColumn">Column representing the time series. Should be of type <see cref="long"/> or <see cref="System.DateTime"/></param> /// <param name="grainColumns">List of columns to use as grains</param> /// <param name="imputeMode">Mode of imputation for missing values in column. If not passed defaults to forward fill</param> public static TimeSeriesImputerEstimator ReplaceMissingTimeSeriesValues(this TransformsCatalog catalog, string timeSeriesColumn, string[] grainColumns, TimeSeriesImputerEstimator.ImputationStrategy imputeMode = TimeSeriesImputerEstimator.ImputationStrategy.ForwardFill) => new TimeSeriesImputerEstimator(CatalogUtils.GetEnvironment(catalog), timeSeriesColumn, grainColumns, null, TimeSeriesImputerEstimator.FilterMode.NoFilter, imputeMode, true);
public ProjectionTransforms(TransformsCatalog owner) : base(owner) { }
public TextTransforms(TransformsCatalog owner) : base(owner) { }
/// <summary> /// Transforms a categorical column into a set of features that includes the count of each label class, /// the log-odds for each label class and the back-off indicator. /// </summary> /// <param name="catalog">The transforms catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="initialCounts">A previously trained count table containing initial counts.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="labelColumn">The name of the label column.</param> /// <returns></returns> public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, string outputColumnName, CountTargetEncodingTransformer initialCounts, string inputColumnName = null, string labelColumn = "Label") { return(new CountTargetEncodingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, initialCounts, new[] { new InputOutputColumnPair(outputColumnName, inputColumnName) })); }
/// <summary> /// Create a <see cref="DateTimeEstimator"/>, which splits up the input column specified by <paramref name="inputColumnName"/> /// into all its individual datetime components. Input column must be of type Int64 representing the number of seconds since the unix epoc. /// This transformer will append the <paramref name="columnPrefix"/> to all the output columns. If you specify a country, /// Holiday details will be looked up for that country as well. /// </summary> /// <param name="catalog">Transform catalog</param> /// <param name="inputColumnName">Input column name</param> /// <param name="columnPrefix">Prefix to add to the generated columns</param> /// <param name="country">Country name to get holiday details for</param> /// <returns><see cref="DateTimeEstimator"/></returns> public static DateTimeEstimator FeaturizeDateTime(this TransformsCatalog catalog, string inputColumnName, string columnPrefix, DateTimeEstimator.HolidayList country = DateTimeEstimator.HolidayList.None) => new DateTimeEstimator(CatalogUtils.GetEnvironment(catalog), inputColumnName, columnPrefix, country);
public FeatureSelectionTransforms(TransformsCatalog owner) : base(owner) { }
/// <summary> /// Initializes a new instance of <see cref="MissingValueReplacingEstimator"/> /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns">The name of the columns to use, and per-column transformation configuraiton.</param> public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingTransformer.ColumnInfo[] columns) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
public static IHostEnvironment GetEnvironment(this TransformsCatalog catalog) => Contracts.CheckRef(catalog, nameof(catalog)).Environment;