TransformsCatalog C# (CSharp) 코드 예제들

예제 #1

0

파일 보기

파일: TransformsCatalogExtensions.cs 프로젝트: yswenli/machinelearning

        /// <summary>
        /// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.Binning"/> mode.
        /// The values are assigned into bins with equal density.
        /// </summary>
        /// <param name="catalog">The transform catalog</param>
        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
        /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
        /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
        /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
        public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog,
                                                            string outputColumnName, string inputColumnName = null,
                                                            long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
                                                            bool fixZero             = NormalizingEstimator.Defaults.EnsureZeroUntouched,
                                                            int maximumBinCount      = NormalizingEstimator.Defaults.MaximumBinCount)
        {
            var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount);

            return(new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions));
        }

예제 #2

0

파일 보기

        /// <summary>
        /// Create a <see cref="CategoricalImputerEstimator"/>, which fills in the missing values in a column with the most frequent value.
        /// Supports Floats, Doubles, and Strings.
        /// A string is assumed "missing" if it is empty.
        /// </summary>
        /// <param name="catalog">Transform Catalog</param>
        /// <param name="columns">List of <see cref="InputOutputColumnPair"/> to fill in missing values</param>
        /// <returns><see cref="CategoricalImputerEstimator"/></returns>
        public static CategoricalImputerEstimator ImputeCategories(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
        {
            var options = new CategoricalImputerEstimator.Options
            {
                Columns = columns.Select(x => new CategoricalImputerEstimator.Column
                {
                    Name = x.OutputColumnName, Source = x.InputColumnName ?? x.OutputColumnName
                }).ToArray(),
            };

            return(new CategoricalImputerEstimator(CatalogUtils.GetEnvironment(catalog), options));
        }

예제 #3

0

파일 보기

파일: TransformsCatalogExtensions.cs 프로젝트: yswenli/machinelearning

        /// <summary>
        /// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.SupervisedBinning"/> mode.
        /// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column.
        /// </summary>
        /// <param name="catalog">The transform catalog</param>
        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
        /// <param name="labelColumnName">Name of the label column for supervised binning.</param>
        /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
        /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
        /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
        /// <param name="mininimumExamplesPerBin">Minimum number of examples per bin.</param>
        public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog,
                                                                      string outputColumnName, string inputColumnName = null,
                                                                      string labelColumnName      = DefaultColumnNames.Label,
                                                                      long maximumExampleCount    = NormalizingEstimator.Defaults.MaximumExampleCount,
                                                                      bool fixZero                = NormalizingEstimator.Defaults.EnsureZeroUntouched,
                                                                      int maximumBinCount         = NormalizingEstimator.Defaults.MaximumBinCount,
                                                                      int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize)
        {
            var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin);

            return(new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions));
        }

예제 #4

0

파일 보기

파일: MLContextExtensions.cs 프로젝트: srsaggam/AutoMLDotNet

        public static IEstimator <ITransformer> InferTransforms(this TransformsCatalog catalog, IDataView data, string label)
        {
            var mlContext           = new MLContext();
            var suggestedTransforms = TransformInferenceApi.InferTransforms(mlContext, data, label);
            var estimators          = suggestedTransforms.Select(s => s.Estimator);
            var pipeline            = new EstimatorChain <ITransformer>();

            foreach (var estimator in estimators)
            {
                pipeline = pipeline.Append(estimator);
            }
            return(pipeline);
        }

예제 #5

0

파일 보기

        /// <summary>
        /// Create a <see cref="CategoricalImputerEstimator"/>, which fills in the missing values in a column with the most frequent value.
        /// Supports Floats, Doubles, and Strings.
        /// A string is assumed "missing" if it is empty.
        /// </summary>
        /// <param name="catalog">Transform Catalog</param>
        /// <param name="outputColumnName">Output column name</param>
        /// <param name="inputColumnName">Input column name, if null defaults to <paramref name="outputColumnName"/></param>
        /// <returns><see cref="CategoricalImputerEstimator"/></returns>
        public static CategoricalImputerEstimator ImputeCategories(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null)
        {
            var options = new CategoricalImputerEstimator.Options
            {
                Columns = new CategoricalImputerEstimator.Column[1] {
                    new CategoricalImputerEstimator.Column()
                    {
                        Name = outputColumnName, Source = inputColumnName ?? outputColumnName
                    }
                }
            };

            return(new CategoricalImputerEstimator(CatalogUtils.GetEnvironment(catalog), options));
        }

예제 #6

0

파일 보기

        /// <summary>
        /// Transforms a categorical column into a set of features that includes the count of each label class,
        /// the log-odds for each label class and the back-off indicator.
        /// </summary>
        /// <param name="catalog">The transforms catalog.</param>
        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
        /// <param name="labelColumn">The name of the label column.</param>
        /// <param name="builder">The builder that creates the count tables from the training data.</param>
        /// <param name="priorCoefficient">The coefficient with which to apply the prior smoothing to the features.</param>
        /// <param name="laplaceScale">The Laplacian noise diversity/scale-parameter. Recommended values are between 0 and 1. Note that the noise
        /// will only be applied if the estimator is part of an <see cref="EstimatorChain{TLastTransformer}"/>, when fitting the next estimator in the chain.</param>
        /// <param name="numberOfBits">The number of bits to hash the input into. Must be between 1 and 31, inclusive.</param>
        /// <param name="combine">In case the input is a vector column, indicates whether the values should be combined into a single hash to create a single
        /// count table, or be left as a vector of hashes with multiple count tables.</param>
        /// <param name="hashingSeed">The seed used for hashing the input columns.</param>
        public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null,
                                                                     string labelColumn            = DefaultColumnNames.Label,
                                                                     CountTableBuilderBase builder = null,
                                                                     float priorCoefficient        = CountTableTransformer.Defaults.PriorCoefficient,
                                                                     float laplaceScale            = CountTableTransformer.Defaults.LaplaceScale,
                                                                     int numberOfBits = HashingEstimator.Defaults.NumberOfBits,
                                                                     bool combine     = HashingEstimator.Defaults.Combine,
                                                                     uint hashingSeed = HashingEstimator.Defaults.Seed)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckNonEmpty(outputColumnName, nameof(outputColumnName));

            inputColumnName = string.IsNullOrEmpty(inputColumnName) ? outputColumnName : inputColumnName;
            builder         = builder ?? new CMCountTableBuilder();

            return(new CountTargetEncodingEstimator(env, labelColumn,
                                                    new[] { new CountTableEstimator.ColumnOptions(outputColumnName, inputColumnName, builder, priorCoefficient, laplaceScale) },
                                                    numberOfBits, combine, hashingSeed));
        }

예제 #7

0

파일 보기

        /// <summary>
        /// Transforms a categorical column into a set of features that includes the count of each label class,
        /// the log-odds for each label class and the back-off indicator.
        /// </summary>
        /// <param name="catalog">The transforms catalog.</param>
        /// <param name="columns">The input and output columns.</param>
        /// <param name="labelColumn">The name of the label column.</param>
        /// <param name="builder">The builder that creates the count tables from the training data.</param>
        /// <param name="priorCoefficient">The coefficient with which to apply the prior smoothing to the features.</param>
        /// <param name="laplaceScale">The Laplacian noise diversity/scale-parameter. Recommended values are between 0 and 1. Note that the noise
        /// will only be applied if the estimator is part of an <see cref="EstimatorChain{TLastTransformer}"/>, when fitting the next estimator in the chain.</param>
        /// <param name="sharedTable">Indicates whether to keep counts for all columns and slots in one shared count table. If true, the keys in the count table
        /// will include a hash of the column and slot indices.</param>
        /// <param name="numberOfBits">The number of bits to hash the input into. Must be between 1 and 31, inclusive.</param>
        /// <param name="combine">In case the input is a vector column, indicates whether the values should be combined into a single hash to create a single
        /// count table, or be left as a vector of hashes with multiple count tables.</param>
        /// <param name="hashingSeed">The seed used for hashing the input columns.</param>
        /// <returns></returns>
        public static CountTargetEncodingEstimator CountTargetEncode(this TransformsCatalog catalog,
                                                                     InputOutputColumnPair[] columns, string labelColumn = DefaultColumnNames.Label,
                                                                     CountTableBuilderBase builder = null,
                                                                     float priorCoefficient        = CountTableTransformer.Defaults.PriorCoefficient,
                                                                     float laplaceScale            = CountTableTransformer.Defaults.LaplaceScale,
                                                                     bool sharedTable = CountTableTransformer.Defaults.SharedTable,
                                                                     int numberOfBits = HashingEstimator.Defaults.NumberOfBits,
                                                                     bool combine     = HashingEstimator.Defaults.Combine,
                                                                     uint hashingSeed = HashingEstimator.Defaults.Seed)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckValue(columns, nameof(columns));

            builder = builder ?? new CMCountTableBuilder();

            CountTargetEncodingEstimator estimator;

            if (sharedTable)
            {
                var columnOptions = new CountTableEstimator.SharedColumnOptions[columns.Length];
                for (int i = 0; i < columns.Length; i++)
                {
                    columnOptions[i] = new CountTableEstimator.SharedColumnOptions(
                        columns[i].OutputColumnName, columns[i].InputColumnName, priorCoefficient, laplaceScale);
                }
                estimator = new CountTargetEncodingEstimator(env, labelColumn, columnOptions, builder, numberOfBits, combine, hashingSeed);
            }
            else
            {
                var columnOptions = new CountTableEstimator.ColumnOptions[columns.Length];
                for (int i = 0; i < columns.Length; i++)
                {
                    columnOptions[i] = new CountTableEstimator.ColumnOptions(
                        columns[i].OutputColumnName, columns[i].InputColumnName, builder, priorCoefficient, laplaceScale);
                }
                estimator = new CountTargetEncodingEstimator(env, labelColumn, columnOptions, numberOfBits: numberOfBits, combine: combine, hashingSeed: hashingSeed);
            }
            return(estimator);
        }

예제 #8

0

파일 보기

        public static OnnxTransformOutput ApplyOnnxModel(IHostEnvironment env, OnnxTransformInput input)
        {
            var host = EntryPointUtils.CheckArgsAndCreateHost(env, "OnnxTransform", input);

            var inputColumns  = input.InputColumns ?? (Array.Empty <string>());
            var outputColumns = input.OutputColumns ?? (Array.Empty <string>());

            var transformsCatalog    = new TransformsCatalog(host);
            var onnxScoringEstimator = OnnxCatalog.ApplyOnnxModel(transformsCatalog,
                                                                  outputColumns,
                                                                  inputColumns,
                                                                  input.ModelFile,
                                                                  input.GpuDeviceId,
                                                                  input.FallbackToCpu);

            var view = onnxScoringEstimator.Fit(input.Data).Transform(input.Data);

            return(new OnnxTransformOutput()
            {
                Model = new TransformModelImpl(host, view, input.Data),
                OutputData = view
            });
        }

예제 #9

0

파일 보기