public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo) { var options = TrainerExtensionUtil.CreateOptions <SdcaRegressionTrainer.Options>(sweepParams, columnInfo.LabelColumnName); return(mlContext.Regression.Trainers.Sdca(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var binaryTrainer = _binaryLearnerCatalogItem.CreateInstance(mlContext, sweepParams, columnInfo) as LinearSvmTrainer; return(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer, labelColumnName: columnInfo.LabelColumnName)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <LinearSvmTrainer.Options>(sweepParams, columnInfo.LabelColumnName); return(mlContext.BinaryClassification.Trainers.LinearSvm(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo) { var options = TrainerExtensionUtil.CreateOptions <SymbolicSgdLogisticRegressionBinaryTrainer.Options>(sweepParams, columnInfo.LabelColumnName); return(mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <OnlineGradientDescentTrainer.Options>(sweepParams, columnInfo.LabelColumnName); return(mlContext.Regression.Trainers.OnlineGradientDescent(options)); }
private static void ValidateTrainData(IDataView trainData, ColumnInformation columnInformation) { if (trainData == null) { throw new ArgumentNullException(nameof(trainData), "Training data cannot be null"); } if (DatasetDimensionsUtil.IsDataViewEmpty(trainData)) { throw new ArgumentException("Training data has 0 rows", nameof(trainData)); } foreach (var column in trainData.Schema) { if (column.Name == DefaultColumnNames.Features && column.Type.GetItemType() != NumberDataViewType.Single) { throw new ArgumentException($"{DefaultColumnNames.Features} column must be of data type {NumberDataViewType.Single}", nameof(trainData)); } if ((column.Name != columnInformation.LabelColumnName && column.Name != columnInformation.UserIdColumnName && column.Name != columnInformation.ItemIdColumnName && column.Name != columnInformation.GroupIdColumnName) && column.Type.GetItemType() != BooleanDataViewType.Instance && column.Type.GetItemType() != NumberDataViewType.Single && column.Type.GetItemType() != TextDataViewType.Instance) { throw new ArgumentException($"Only supported feature column types are " + $"{BooleanDataViewType.Instance}, {NumberDataViewType.Single}, and {TextDataViewType.Instance}. " + $"Please change the feature column {column.Name} of type {column.Type} to one of " + $"the supported types.", nameof(trainData)); } } }
private ExperimentResult <TMetrics> ExecuteTrainValidate(IDataView trainData, ColumnInformation columnInfo, IDataView validationData, IEstimator <ITransformer> preFeaturizer, IProgress <RunDetail <TMetrics> > progressHandler) { columnInfo = columnInfo ?? new ColumnInformation(); UserInputValidationUtil.ValidateExperimentExecuteArgs(trainData, columnInfo, validationData, _task); // Apply pre-featurizer ITransformer preprocessorTransform = null; if (preFeaturizer != null) { preprocessorTransform = preFeaturizer.Fit(trainData); trainData = preprocessorTransform.Transform(trainData); validationData = preprocessorTransform.Transform(validationData); } var runner = new TrainValidateRunner <TMetrics>(Context, trainData, validationData, columnInfo.GroupIdColumnName, columnInfo.LabelColumnName, MetricsAgent, preFeaturizer, preprocessorTransform, _logger); var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainData, columnInfo); return(Execute(columnInfo, columns, preFeaturizer, progressHandler, runner)); }
private CrossValidationExperimentResult <TMetrics> ExecuteCrossVal(IDataView[] trainDatasets, ColumnInformation columnInfo, IDataView[] validationDatasets, IEstimator <ITransformer> preFeaturizer, IProgress <CrossValidationRunDetail <TMetrics> > progressHandler) { columnInfo = columnInfo ?? new ColumnInformation(); UserInputValidationUtil.ValidateExperimentExecuteArgs(trainDatasets[0], columnInfo, validationDatasets[0], _task); // Apply pre-featurizer ITransformer[] preprocessorTransforms = null; (trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer); var runner = new CrossValRunner <TMetrics>(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer, preprocessorTransforms, columnInfo.GroupIdColumnName, columnInfo.LabelColumnName, _logger); var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo); // Execute experiment & get all pipelines run var experiment = new Experiment <CrossValidationRunDetail <TMetrics>, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler, Settings, MetricsAgent, _trainerAllowList, columns, runner, _logger); var runDetails = experiment.Execute(); var bestRun = GetBestCrossValRun(runDetails); var experimentResult = new CrossValidationExperimentResult <TMetrics>(runDetails, bestRun); return(experimentResult); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo) { var binaryTrainer = _binaryLearnerCatalogItem.CreateInstance(mlContext, sweepParams, columnInfo) as SymbolicSgdLogisticRegressionBinaryTrainer; return(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer, labelColumnName: columnInfo.LabelColumnName)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo) { var options = TrainerExtensionUtil.CreateOptions <SdcaMaximumEntropyMulticlassTrainer.Options>(sweepParams, columnInfo.LabelColumnName); return(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(options)); }
/// <summary> /// Infers information about the columns of a dataset in a file located at <paramref name="path"/>. /// </summary> /// <param name="path">Path to a dataset file.</param> /// <param name="columnInformation">Column information for the dataset.</param> /// <param name="separatorChar">The character used as separator between data elements in a row. If <see langword="null"/>, AutoML will try to infer this value.</param> /// <param name="allowQuoting">Whether the file can contain columns defined by a quoted string. If <see langword="null"/>, AutoML will try to infer this value.</param> /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format. If <see langword="null"/>, AutoML will try to infer this value.</param> /// <param name="trimWhitespace">Whether trailing whitespace should be removed from dataset file lines.</param> /// <param name="groupColumns">Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See <see cref="TextLoader.Range"/> for more information.</param> /// <returns>Information inferred about the columns in the provided dataset.</returns> /// <remarks> /// Infers information about the name, data type, and purpose of each column. /// The returned <see cref="ColumnInferenceResults.TextLoaderOptions" /> can be used to /// instantiate a <see cref="TextLoader" />. The <see cref="TextLoader" /> can be used to /// obtain an <see cref="IDataView"/> that can be fed into an AutoML experiment, /// or used elsewhere in the ML.NET ecosystem (ie in <see cref="IEstimator{TTransformer}.Fit(IDataView)"/>. /// The <see cref="ColumnInformation"/> contains the inferred purpose of each column in the dataset. /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.) /// The <see cref="ColumnInformation"/> can be inspected and modified (or kept as is) and used by an AutoML experiment. /// </remarks> public ColumnInferenceResults InferColumns(string path, ColumnInformation columnInformation, char?separatorChar = null, bool?allowQuoting = null, bool?allowSparse = null, bool trimWhitespace = false, bool groupColumns = true) { columnInformation = columnInformation ?? new ColumnInformation(); UserInputValidationUtil.ValidateInferColumnsArgs(path, columnInformation); return(ColumnInferenceApi.InferColumns(_context, path, columnInformation, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <Options>(null, columnInfo.LabelColumnName); return(mlContext.MulticlassClassification.Trainers.ImageClassification(options)); }
public static void ValidateExperimentExecuteArgs(IDataView trainData, ColumnInformation columnInformation, IDataView validationData, TaskKind task) { ValidateTrainData(trainData, columnInformation); ValidateColumnInformation(trainData, columnInformation, task); ValidateValidationData(trainData, validationData); }
internal static ColumnPurpose?GetColumnPurpose(this ColumnInformation columnInfo, string columnName) { if (columnName == columnInfo.LabelColumnName) { return(ColumnPurpose.Label); } if (columnName == columnInfo.ExampleWeightColumnName) { return(ColumnPurpose.Weight); } if (columnName == columnInfo.SamplingKeyColumnName) { return(ColumnPurpose.SamplingKey); } if (columnInfo.CategoricalColumnNames.Contains(columnName)) { return(ColumnPurpose.CategoricalFeature); } if (columnInfo.NumericColumnNames.Contains(columnName)) { return(ColumnPurpose.NumericFeature); } if (columnInfo.TextColumnNames.Contains(columnName)) { return(ColumnPurpose.TextFeature); } if (columnInfo.IgnoredColumnNames.Contains(columnName)) { return(ColumnPurpose.Ignore); } if (columnName == columnInfo.UserIdColumnName) { return(ColumnPurpose.UserId); } if (columnName == columnInfo.GroupIdColumnName) { return(ColumnPurpose.GroupId); } if (columnName == columnInfo.ItemIdColumnName) { return(ColumnPurpose.ItemId); } if (columnInfo.ImagePathColumnNames.Contains(columnName)) { return(ColumnPurpose.ImagePath); } return(null); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <FastTreeRankingTrainer.Options>(sweepParams, columnInfo.LabelColumnName); options.RowGroupColumnName = columnInfo.GroupIdColumnName; return(mlContext.Ranking.Trainers.FastTree(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <LbfgsMaximumEntropyMulticlassTrainer.Options>(sweepParams, columnInfo.LabelColumnName); options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName; return(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { LightGbmRankingTrainer.Options options = TrainerExtensionUtil.CreateLightGbmOptions <LightGbmRankingTrainer.Options, float, RankingPredictionTransformer <LightGbmRankingModelParameters>, LightGbmRankingModelParameters>(sweepParams, columnInfo); options.RowGroupColumnName = columnInfo.GroupIdColumnName; return(mlContext.Ranking.Trainers.LightGbm(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <FastTreeBinaryTrainer.Options>(sweepParams, columnInfo.LabelColumnName); options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName; return(mlContext.BinaryClassification.Trainers.FastTree(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <LbfgsPoissonRegressionTrainer.Options>(sweepParams, columnInfo.LabelColumnName); options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName; return(mlContext.Regression.Trainers.LbfgsPoissonRegression(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo) { var options = TrainerExtensionUtil.CreateOptions <SgdCalibratedTrainer.Options>(sweepParams, columnInfo.LabelColumnName); options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName; return(mlContext.BinaryClassification.Trainers.SgdCalibrated(options)); }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo) { var options = TrainerExtensionUtil.CreateOptions <FastTreeTweedieTrainer.Options>(sweepParams, columnInfo.LabelColumnName); options.ExampleWeightColumnName = columnInfo.ExampleWeightColumnName; return(mlContext.Regression.Trainers.FastTreeTweedie(options)); }
/// <summary> /// Executes an AutoML experiment. /// </summary> /// <param name="trainData">The training data to be used by the AutoML experiment.</param> /// <param name="validationData">The validation data to be used by the AutoML experiment.</param> /// <param name="labelColumnName">The name of the label column.</param> /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an /// experiment. (The pre-featurizer will be fit only on the training data split to produce a /// trained transform. Then, the trained transform will be applied to both the training /// data split and corresponding validation data split.)</param> /// <param name="progressHandler">A user-defined object that implements /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the /// course of the experiment. /// </param> /// <returns>The experiment result.</returns> /// <remarks> /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// </remarks> public ExperimentResult <TMetrics> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <TMetrics> > progressHandler = null) { var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName }; return(Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler)); }
public override ExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, IDataView validationData, string labelColumnName = "Label", IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <BinaryClassificationMetrics> > progressHandler = null) { var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName, }; return(this.Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler)); }
/// <summary> /// Executes an AutoML experiment. /// </summary> /// <param name="trainData">The training data to be used by the AutoML experiment.</param> /// <param name="numberOfCVFolds">The number of cross validation folds into which the training data should be divided when fitting a model.</param> /// <param name="columnInformation">Column information for the dataset.</param> /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an /// experiment. (The pre-featurizer will be fit only on the training data split to produce a /// trained transform. Then, the trained transform will be applied to both the training /// data split and corresponding validation data split.)</param> /// <param name="progressHandler">A user-defined object that implements /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the /// course of the experiment. /// </param> /// <returns>The cross validation experiment result.</returns> /// <remarks> /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// </remarks> public CrossValidationExperimentResult <TMetrics> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator <ITransformer> preFeaturizer = null, IProgress <CrossValidationRunDetail <TMetrics> > progressHandler = null) { UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds); var splitResult = SplitUtil.CrossValSplit(Context, trainData, numberOfCVFolds, columnInformation?.SamplingKeyColumnName); return(ExecuteCrossVal(splitResult.trainDatasets, columnInformation, splitResult.validationDatasets, preFeaturizer, progressHandler)); }
public ITrainerEsitmator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { var options = TrainerExtensionUtil.CreateOptions <MatrixFactorizationTrainer.Options>(sweepParams); options.LabelColumnName = columnInfo.LabelColumnName; options.MatrixColumnIndexColumnName = columnInfo.UserIdColumnName; options.MatrixRowIndexColumnName = columnInfo.ItemIdColumnName; return(mlContext.Recommendation().Trainers.MatrixFactorization(options)); }
/// <summary> /// Executes an AutoML experiment. /// </summary> /// <param name="trainData">The training data to be used by the AutoML experiment.</param> /// <param name="validationData">The validation data to be used by the AutoML experiment.</param> /// <param name="columnInformation">Column information for the dataset.</param> /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an /// experiment. (The pre-featurizer will be fit only on the training data split to produce a /// trained transform. Then, the trained transform will be applied to both the training /// data split and corresponding validation data split.)</param> /// <param name="progressHandler">A user-defined object that implements /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the /// course of the experiment. /// </param> /// <returns>The experiment result.</returns> /// <remarks> /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// </remarks> public virtual ExperimentResult <TMetrics> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <TMetrics> > progressHandler = null) { if (validationData == null) { return(Execute(trainData, columnInformation, preFeaturizer, progressHandler)); } return(ExecuteTrainValidate(trainData, columnInformation, validationData, preFeaturizer, progressHandler)); }
public static ColumnInferenceResults InferColumns(MLContext context, string path, string labelColumn, char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns) { var columnInfo = new ColumnInformation() { LabelColumnName = labelColumn }; return(InferColumns(context, path, columnInfo, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns)); }
public override CrossValidationExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, uint numberOfCVFolds, string labelColumnName = "Label", string samplingKeyColumn = null, IEstimator <ITransformer> preFeaturizer = null, IProgress <CrossValidationRunDetail <BinaryClassificationMetrics> > progressHandler = null) { var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName, SamplingKeyColumnName = samplingKeyColumn, }; return(this.Execute(trainData, numberOfCVFolds, columnInformation, preFeaturizer, progressHandler)); }
public static ColumnInformation BuildColumnInfo(IDictionary <string, object> props) { var columnInfo = new ColumnInformation(); columnInfo.LabelColumnName = props[LabelColumn] as string; props.TryGetValue(WeightColumn, out var weightColumn); columnInfo.ExampleWeightColumnName = weightColumn as string; return(columnInfo); }
internal SuggestedTrainer(MLContext mlContext, ITrainerExtension trainerExtension, ColumnInformation columnInfo, ParameterSet hyperParamSet = null) { _mlContext = mlContext; _trainerExtension = trainerExtension; _columnInfo = columnInfo; SweepParams = _trainerExtension.GetHyperparamSweepRanges(); TrainerName = TrainerExtensionCatalog.GetTrainerName(_trainerExtension); SetHyperparamValues(hyperParamSet); }