public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo)
        {
            var options = TrainerExtensionUtil.CreateOptions <SdcaRegressionTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            return(mlContext.Regression.Trainers.Sdca(options));
        }
예제 #2
0
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var binaryTrainer = _binaryLearnerCatalogItem.CreateInstance(mlContext, sweepParams, columnInfo) as LinearSvmTrainer;

            return(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer, labelColumnName: columnInfo.LabelColumnName));
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <LinearSvmTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            return(mlContext.BinaryClassification.Trainers.LinearSvm(options));
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo)
        {
            var options = TrainerExtensionUtil.CreateOptions <SymbolicSgdLogisticRegressionBinaryTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            return(mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(options));
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <OnlineGradientDescentTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            return(mlContext.Regression.Trainers.OnlineGradientDescent(options));
        }
예제 #6
0
        private static void ValidateTrainData(IDataView trainData, ColumnInformation columnInformation)
        {
            if (trainData == null)
            {
                throw new ArgumentNullException(nameof(trainData), "Training data cannot be null");
            }

            if (DatasetDimensionsUtil.IsDataViewEmpty(trainData))
            {
                throw new ArgumentException("Training data has 0 rows", nameof(trainData));
            }

            foreach (var column in trainData.Schema)
            {
                if (column.Name == DefaultColumnNames.Features && column.Type.GetItemType() != NumberDataViewType.Single)
                {
                    throw new ArgumentException($"{DefaultColumnNames.Features} column must be of data type {NumberDataViewType.Single}", nameof(trainData));
                }

                if ((column.Name != columnInformation.LabelColumnName &&
                     column.Name != columnInformation.UserIdColumnName &&
                     column.Name != columnInformation.ItemIdColumnName &&
                     column.Name != columnInformation.GroupIdColumnName)
                    &&
                    column.Type.GetItemType() != BooleanDataViewType.Instance &&
                    column.Type.GetItemType() != NumberDataViewType.Single &&
                    column.Type.GetItemType() != TextDataViewType.Instance)
                {
                    throw new ArgumentException($"Only supported feature column types are " +
                                                $"{BooleanDataViewType.Instance}, {NumberDataViewType.Single}, and {TextDataViewType.Instance}. " +
                                                $"Please change the feature column {column.Name} of type {column.Type} to one of " +
                                                $"the supported types.", nameof(trainData));
                }
            }
        }
예제 #7
0
        private ExperimentResult <TMetrics> ExecuteTrainValidate(IDataView trainData,
                                                                 ColumnInformation columnInfo,
                                                                 IDataView validationData,
                                                                 IEstimator <ITransformer> preFeaturizer,
                                                                 IProgress <RunDetail <TMetrics> > progressHandler)
        {
            columnInfo = columnInfo ?? new ColumnInformation();
            UserInputValidationUtil.ValidateExperimentExecuteArgs(trainData, columnInfo, validationData, _task);

            // Apply pre-featurizer
            ITransformer preprocessorTransform = null;

            if (preFeaturizer != null)
            {
                preprocessorTransform = preFeaturizer.Fit(trainData);
                trainData             = preprocessorTransform.Transform(trainData);
                validationData        = preprocessorTransform.Transform(validationData);
            }

            var runner = new TrainValidateRunner <TMetrics>(Context, trainData, validationData, columnInfo.GroupIdColumnName, columnInfo.LabelColumnName, MetricsAgent,
                                                            preFeaturizer, preprocessorTransform, _logger);
            var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainData, columnInfo);

            return(Execute(columnInfo, columns, preFeaturizer, progressHandler, runner));
        }
예제 #8
0
        private CrossValidationExperimentResult <TMetrics> ExecuteCrossVal(IDataView[] trainDatasets,
                                                                           ColumnInformation columnInfo,
                                                                           IDataView[] validationDatasets,
                                                                           IEstimator <ITransformer> preFeaturizer,
                                                                           IProgress <CrossValidationRunDetail <TMetrics> > progressHandler)
        {
            columnInfo = columnInfo ?? new ColumnInformation();
            UserInputValidationUtil.ValidateExperimentExecuteArgs(trainDatasets[0], columnInfo, validationDatasets[0], _task);

            // Apply pre-featurizer
            ITransformer[] preprocessorTransforms = null;
            (trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer);

            var runner = new CrossValRunner <TMetrics>(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer,
                                                       preprocessorTransforms, columnInfo.GroupIdColumnName, columnInfo.LabelColumnName, _logger);
            var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo);

            // Execute experiment & get all pipelines run
            var experiment = new Experiment <CrossValidationRunDetail <TMetrics>, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler,
                                                                                            Settings, MetricsAgent, _trainerAllowList, columns, runner, _logger);
            var runDetails = experiment.Execute();

            var bestRun          = GetBestCrossValRun(runDetails);
            var experimentResult = new CrossValidationExperimentResult <TMetrics>(runDetails, bestRun);

            return(experimentResult);
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo)
        {
            var binaryTrainer = _binaryLearnerCatalogItem.CreateInstance(mlContext, sweepParams, columnInfo) as SymbolicSgdLogisticRegressionBinaryTrainer;

            return(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer, labelColumnName: columnInfo.LabelColumnName));
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo)
        {
            var options = TrainerExtensionUtil.CreateOptions <SdcaMaximumEntropyMulticlassTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            return(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(options));
        }
예제 #11
0
 /// <summary>
 /// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
 /// </summary>
 /// <param name="path">Path to a dataset file.</param>
 /// <param name="columnInformation">Column information for the dataset.</param>
 /// <param name="separatorChar">The character used as separator between data elements in a row. If <see langword="null"/>, AutoML will try to infer this value.</param>
 /// <param name="allowQuoting">Whether the file can contain columns defined by a quoted string. If <see langword="null"/>, AutoML will try to infer this value.</param>
 /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format. If <see langword="null"/>, AutoML will try to infer this value.</param>
 /// <param name="trimWhitespace">Whether trailing whitespace should be removed from dataset file lines.</param>
 /// <param name="groupColumns">Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See <see cref="TextLoader.Range"/> for more information.</param>
 /// <returns>Information inferred about the columns in the provided dataset.</returns>
 /// <remarks>
 /// Infers information about the name, data type, and purpose of each column.
 /// The returned <see cref="ColumnInferenceResults.TextLoaderOptions" /> can be used to
 /// instantiate a <see cref="TextLoader" />. The <see cref="TextLoader" /> can be used to
 /// obtain an <see cref="IDataView"/> that can be fed into an AutoML experiment,
 /// or used elsewhere in the ML.NET ecosystem (ie in <see cref="IEstimator{TTransformer}.Fit(IDataView)"/>.
 /// The <see cref="ColumnInformation"/> contains the inferred purpose of each column in the dataset.
 /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.)
 /// The <see cref="ColumnInformation"/> can be inspected and modified (or kept as is) and used by an AutoML experiment.
 /// </remarks>
 public ColumnInferenceResults InferColumns(string path, ColumnInformation columnInformation, char?separatorChar = null, bool?allowQuoting = null,
                                            bool?allowSparse = null, bool trimWhitespace = false, bool groupColumns = true)
 {
     columnInformation = columnInformation ?? new ColumnInformation();
     UserInputValidationUtil.ValidateInferColumnsArgs(path, columnInformation);
     return(ColumnInferenceApi.InferColumns(_context, path, columnInformation, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns));
 }
예제 #12
0
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <Options>(null, columnInfo.LabelColumnName);

            return(mlContext.MulticlassClassification.Trainers.ImageClassification(options));
        }
예제 #13
0
 public static void ValidateExperimentExecuteArgs(IDataView trainData, ColumnInformation columnInformation,
                                                  IDataView validationData, TaskKind task)
 {
     ValidateTrainData(trainData, columnInformation);
     ValidateColumnInformation(trainData, columnInformation, task);
     ValidateValidationData(trainData, validationData);
 }
        internal static ColumnPurpose?GetColumnPurpose(this ColumnInformation columnInfo, string columnName)
        {
            if (columnName == columnInfo.LabelColumnName)
            {
                return(ColumnPurpose.Label);
            }

            if (columnName == columnInfo.ExampleWeightColumnName)
            {
                return(ColumnPurpose.Weight);
            }

            if (columnName == columnInfo.SamplingKeyColumnName)
            {
                return(ColumnPurpose.SamplingKey);
            }

            if (columnInfo.CategoricalColumnNames.Contains(columnName))
            {
                return(ColumnPurpose.CategoricalFeature);
            }

            if (columnInfo.NumericColumnNames.Contains(columnName))
            {
                return(ColumnPurpose.NumericFeature);
            }

            if (columnInfo.TextColumnNames.Contains(columnName))
            {
                return(ColumnPurpose.TextFeature);
            }

            if (columnInfo.IgnoredColumnNames.Contains(columnName))
            {
                return(ColumnPurpose.Ignore);
            }

            if (columnName == columnInfo.UserIdColumnName)
            {
                return(ColumnPurpose.UserId);
            }

            if (columnName == columnInfo.GroupIdColumnName)
            {
                return(ColumnPurpose.GroupId);
            }

            if (columnName == columnInfo.ItemIdColumnName)
            {
                return(ColumnPurpose.ItemId);
            }

            if (columnInfo.ImagePathColumnNames.Contains(columnName))
            {
                return(ColumnPurpose.ImagePath);
            }

            return(null);
        }
예제 #15
0
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <FastTreeRankingTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            options.RowGroupColumnName = columnInfo.GroupIdColumnName;
            return(mlContext.Ranking.Trainers.FastTree(options));
        }
예제 #16
0
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <LbfgsMaximumEntropyMulticlassTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName;
            return(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(options));
        }
예제 #17
0
 public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                         ColumnInformation columnInfo, IDataView validationSet)
 {
     LightGbmRankingTrainer.Options options = TrainerExtensionUtil.CreateLightGbmOptions <LightGbmRankingTrainer.Options,
                                                                                          float, RankingPredictionTransformer <LightGbmRankingModelParameters>, LightGbmRankingModelParameters>(sweepParams, columnInfo);
     options.RowGroupColumnName = columnInfo.GroupIdColumnName;
     return(mlContext.Ranking.Trainers.LightGbm(options));
 }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <FastTreeBinaryTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName;
            return(mlContext.BinaryClassification.Trainers.FastTree(options));
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <LbfgsPoissonRegressionTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName;
            return(mlContext.Regression.Trainers.LbfgsPoissonRegression(options));
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo)
        {
            var options = TrainerExtensionUtil.CreateOptions <SgdCalibratedTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName;
            return(mlContext.BinaryClassification.Trainers.SgdCalibrated(options));
        }
        public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo)
        {
            var options = TrainerExtensionUtil.CreateOptions <FastTreeTweedieTrainer.Options>(sweepParams, columnInfo.LabelColumnName);

            options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName;
            return(mlContext.Regression.Trainers.FastTreeTweedie(options));
        }
예제 #22
0
        /// <summary>
        /// Executes an AutoML experiment.
        /// </summary>
        /// <param name="trainData">The training data to be used by the AutoML experiment.</param>
        /// <param name="validationData">The validation data to be used by the AutoML experiment.</param>
        /// <param name="labelColumnName">The name of the label column.</param>
        /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an
        /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
        /// trained transform. Then, the trained transform will be applied to both the training
        /// data split and corresponding validation data split.)</param>
        /// <param name="progressHandler">A user-defined object that implements
        /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method
        /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
        /// course of the experiment.
        /// </param>
        /// <returns>The experiment result.</returns>
        /// <remarks>
        /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
        /// </remarks>
        public ExperimentResult <TMetrics> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <TMetrics> > progressHandler = null)
        {
            var columnInformation = new ColumnInformation()
            {
                LabelColumnName = labelColumnName
            };

            return(Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler));
        }
        public override ExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, IDataView validationData, string labelColumnName = "Label", IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <BinaryClassificationMetrics> > progressHandler = null)
        {
            var columnInformation = new ColumnInformation()
            {
                LabelColumnName = labelColumnName,
            };

            return(this.Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler));
        }
예제 #24
0
        /// <summary>
        /// Executes an AutoML experiment.
        /// </summary>
        /// <param name="trainData">The training data to be used by the AutoML experiment.</param>
        /// <param name="numberOfCVFolds">The number of cross validation folds into which the training data should be divided when fitting a model.</param>
        /// <param name="columnInformation">Column information for the dataset.</param>
        /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an
        /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
        /// trained transform. Then, the trained transform will be applied to both the training
        /// data split and corresponding validation data split.)</param>
        /// <param name="progressHandler">A user-defined object that implements
        /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method
        /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
        /// course of the experiment.
        /// </param>
        /// <returns>The cross validation experiment result.</returns>
        /// <remarks>
        /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
        /// </remarks>
        public CrossValidationExperimentResult <TMetrics> Execute(IDataView trainData, uint numberOfCVFolds,
                                                                  ColumnInformation columnInformation = null, IEstimator <ITransformer> preFeaturizer = null,
                                                                  IProgress <CrossValidationRunDetail <TMetrics> > progressHandler = null)
        {
            UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds);
            var splitResult = SplitUtil.CrossValSplit(Context, trainData, numberOfCVFolds, columnInformation?.SamplingKeyColumnName);

            return(ExecuteCrossVal(splitResult.trainDatasets, columnInformation, splitResult.validationDatasets, preFeaturizer, progressHandler));
        }
예제 #25
0
        public ITrainerEsitmator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                                ColumnInformation columnInfo, IDataView validationSet)
        {
            var options = TrainerExtensionUtil.CreateOptions <MatrixFactorizationTrainer.Options>(sweepParams);

            options.LabelColumnName             = columnInfo.LabelColumnName;
            options.MatrixColumnIndexColumnName = columnInfo.UserIdColumnName;
            options.MatrixRowIndexColumnName    = columnInfo.ItemIdColumnName;
            return(mlContext.Recommendation().Trainers.MatrixFactorization(options));
        }
예제 #26
0
 /// <summary>
 /// Executes an AutoML experiment.
 /// </summary>
 /// <param name="trainData">The training data to be used by the AutoML experiment.</param>
 /// <param name="validationData">The validation data to be used by the AutoML experiment.</param>
 /// <param name="columnInformation">Column information for the dataset.</param>
 /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an
 /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
 /// trained transform. Then, the trained transform will be applied to both the training
 /// data split and corresponding validation data split.)</param>
 /// <param name="progressHandler">A user-defined object that implements
 /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method
 /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
 /// course of the experiment.
 /// </param>
 /// <returns>The experiment result.</returns>
 /// <remarks>
 /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
 /// </remarks>
 public virtual ExperimentResult <TMetrics> Execute(IDataView trainData, IDataView validationData,
                                                    ColumnInformation columnInformation, IEstimator <ITransformer> preFeaturizer = null,
                                                    IProgress <RunDetail <TMetrics> > progressHandler = null)
 {
     if (validationData == null)
     {
         return(Execute(trainData, columnInformation, preFeaturizer, progressHandler));
     }
     return(ExecuteTrainValidate(trainData, columnInformation, validationData, preFeaturizer, progressHandler));
 }
        public static ColumnInferenceResults InferColumns(MLContext context, string path, string labelColumn,
                                                          char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns)
        {
            var columnInfo = new ColumnInformation()
            {
                LabelColumnName = labelColumn
            };

            return(InferColumns(context, path, columnInfo, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns));
        }
        public override CrossValidationExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, uint numberOfCVFolds, string labelColumnName = "Label", string samplingKeyColumn = null, IEstimator <ITransformer> preFeaturizer = null, IProgress <CrossValidationRunDetail <BinaryClassificationMetrics> > progressHandler = null)
        {
            var columnInformation = new ColumnInformation()
            {
                LabelColumnName       = labelColumnName,
                SamplingKeyColumnName = samplingKeyColumn,
            };

            return(this.Execute(trainData, numberOfCVFolds, columnInformation, preFeaturizer, progressHandler));
        }
        public static ColumnInformation BuildColumnInfo(IDictionary <string, object> props)
        {
            var columnInfo = new ColumnInformation();

            columnInfo.LabelColumnName = props[LabelColumn] as string;

            props.TryGetValue(WeightColumn, out var weightColumn);
            columnInfo.ExampleWeightColumnName = weightColumn as string;

            return(columnInfo);
        }
예제 #30
0
 internal SuggestedTrainer(MLContext mlContext, ITrainerExtension trainerExtension,
                           ColumnInformation columnInfo,
                           ParameterSet hyperParamSet = null)
 {
     _mlContext        = mlContext;
     _trainerExtension = trainerExtension;
     _columnInfo       = columnInfo;
     SweepParams       = _trainerExtension.GetHyperparamSweepRanges();
     TrainerName       = TrainerExtensionCatalog.GetTrainerName(_trainerExtension);
     SetHyperparamValues(hyperParamSet);
 }