/// <summary> /// Performs train-test on a pipeline. /// </summary> /// <typeparam name="TInput">Class type that represents input schema.</typeparam> /// <typeparam name="TOutput">Class type that represents prediction schema.</typeparam> /// <param name="pipeline">Machine learning pipeline that contains <see cref="ILearningPipelineLoader"/>, /// transforms and at least one trainer.</param> /// <param name="testData"><see cref="ILearningPipelineLoader"/> that represents the test dataset.</param> /// <returns>Metrics and predictor model.</returns> public TrainTestEvaluatorOutput <TInput, TOutput> TrainTestEvaluate <TInput, TOutput>(LearningPipeline pipeline, ILearningPipelineLoader testData) where TInput : class where TOutput : class, new() { using (var environment = new TlcEnvironment()) { Experiment subGraph = environment.CreateExperiment(); ILearningPipelineStep step = null; List <ILearningPipelineLoader> loaders = new List <ILearningPipelineLoader>(); List <Var <ITransformModel> > transformModels = new List <Var <ITransformModel> >(); Var <ITransformModel> lastTransformModel = null; Var <IDataView> firstPipelineDataStep = null; Var <IPredictorModel> firstModel = null; ILearningPipelineItem firstTransform = null; foreach (ILearningPipelineItem currentItem in pipeline) { if (currentItem is ILearningPipelineLoader loader) { loaders.Add(loader); continue; } step = currentItem.ApplyStep(step, subGraph); if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) { transformModels.Add(dataStep.Model); if (firstPipelineDataStep == null) { firstPipelineDataStep = dataStep.Data; firstTransform = currentItem; } }
/// <summary> /// Performs cross validation on a pipeline. /// </summary> /// <typeparam name="TInput">Class type that represents input schema.</typeparam> /// <typeparam name="TOutput">Class type that represents prediction schema.</typeparam> /// <param name="pipeline">Machine learning pipeline may contain loader, transforms and at least one trainer.</param> /// <returns>List containing metrics and predictor model for each fold</returns> public CrossValidationOutput <TInput, TOutput> CrossValidate <TInput, TOutput>(LearningPipeline pipeline) where TInput : class where TOutput : class, new() { var environment = new MLContext(); { Experiment subGraph = environment.CreateExperiment(); ILearningPipelineStep step = null; List <ILearningPipelineLoader> loaders = new List <ILearningPipelineLoader>(); List <Var <TransformModel> > transformModels = new List <Var <TransformModel> >(); Var <TransformModel> lastTransformModel = null; Var <IDataView> firstPipelineDataStep = null; Var <PredictorModel> firstModel = null; ILearningPipelineItem firstTransform = null; foreach (ILearningPipelineItem currentItem in pipeline) { if (currentItem is ILearningPipelineLoader loader) { loaders.Add(loader); continue; } step = currentItem.ApplyStep(step, subGraph); if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) { transformModels.Add(dataStep.Model); if (firstPipelineDataStep == null) { firstPipelineDataStep = dataStep.Data; firstTransform = currentItem; } }
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { Contracts.Assert(previousStep == null); _dataViewEntryPoint = new Data.DataViewReference(); var importOutput = experiment.Add(_dataViewEntryPoint); return(new CollectionDataSourcePipelineStep(importOutput.Data)); }
/// <summary> /// Computes the quality metrics for the multi-class classification PredictionModel /// using the specified data set. /// </summary> /// <param name="model"> /// The trained multi-class classification PredictionModel to be evaluated. /// </param> /// <param name="testData"> /// The test data that will be predicted and used to evaluate the model. /// </param> /// <returns> /// A ClassificationMetrics instance that describes how well the model performed against the test data. /// </returns> public ClassificationMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) { var environment = new MLContext(); environment.CheckValue(model, nameof(model)); environment.CheckValue(testData, nameof(testData)); Experiment experiment = environment.CreateExperiment(); ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) { throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); } var datasetScorer = new DatasetTransformScorer { Data = testDataOutput.Data, }; DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); Data = scoreOutput.ScoredData; Output evaluteOutput = experiment.Add(this); experiment.Compile(); experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); testData.SetInput(environment, experiment); experiment.Run(); IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); if (overallMetrics == null) { throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(ClassificationEvaluator)} Evaluate."); } IDataView confusionMatrix = experiment.GetOutput(evaluteOutput.ConfusionMatrix); if (confusionMatrix == null) { throw environment.Except($"Could not find ConfusionMatrix in the results returned in {nameof(ClassificationEvaluator)} Evaluate."); } var metric = ClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); if (metric.Count != 1) { throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); } return(metric[0]); }
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { Contracts.Assert(previousStep == null); ImportTextInput = new Data.TextLoader(); ImportTextInput.CustomSchema = CustomSchema; var importOutput = experiment.Add(ImportTextInput); return(new TextLoaderPipelineStep(importOutput.Data)); }
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { if (previousStep != null) { if (!(previousStep is ILearningPipelineDataStep dataStep)) { throw new InvalidOperationException($"{ nameof(XGBoostBinary)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); } TrainingData = dataStep.Data; } Output output = EntryPointXGBoostBinary.Add(experiment, this); return(new XGBoostBinaryPipelineStep(output)); }
/// <summary> /// Computes the quality metrics for the PredictionModel using the specified data set. /// </summary> /// <param name="model"> /// The trained PredictionModel to be evaluated. /// </param> /// <param name="testData"> /// The test data that will be predicted and used to evaulate the model. /// </param> /// <returns> /// A RegressionMetrics instance that describes how well the model performed against the test data. /// </returns> public RegressionMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) { using (var environment = new TlcEnvironment()) { environment.CheckValue(model, nameof(model)); environment.CheckValue(testData, nameof(testData)); Experiment experiment = environment.CreateExperiment(); ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) { throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); } var datasetScorer = new DatasetTransformScorer { Data = testDataOutput.Data, }; DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); Data = scoreOutput.ScoredData; Output evaluteOutput = experiment.Add(this); experiment.Compile(); experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); testData.SetInput(environment, experiment); experiment.Run(); IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); if (overallMetrics == null) { throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(RegressionEvaluator)} Evaluate."); } return(RegressionMetrics.FromOverallMetrics(environment, overallMetrics)); } }
/// <summary> /// Train the model using the ML components in the pipeline. /// </summary> public ExtendedPredictionModel Train() { using (var environment = new TlcEnvironment(seed: _seed, conc: _conc)) { Experiment experiment = environment.CreateExperiment(); ILearningPipelineStep step = null; List <ILearningPipelineLoader> loaders = new List <ILearningPipelineLoader>(); List <Var <ITransformModel> > transformModels = new List <Var <ITransformModel> >(); Var <ITransformModel> lastTransformModel = null; foreach (ILearningPipelineItem currentItem in this) { if (currentItem is ILearningPipelineLoader loader) { loaders.Add(loader); } step = currentItem.ApplyStep(step, experiment); if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) { transformModels.Add(dataStep.Model); }
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { using (var env = new TlcEnvironment()) { var subgraph = env.CreateExperiment(); subgraph.Add(_trainer); var ova = new OneVersusAll(); if (previousStep != null) { if (!(previousStep is ILearningPipelineDataStep dataStep)) { throw new InvalidOperationException($"{ nameof(OneVersusAll)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); } _data = dataStep.Data; ova.TrainingData = dataStep.Data; ova.UseProbabilities = _useProbabilities; ova.Nodes = subgraph; } Output output = experiment.Add(ova); return(new OvaPipelineStep(output)); } }
/// <summary> /// Train the model using the ML components in the pipeline. /// </summary> /// <typeparam name="TInput">Type of data instances the model will be trained on. It's a custom type defined by the user according to the structure of data. /// <para/> /// Please see https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet/get-started/windows for more details on input type. /// </typeparam> /// <typeparam name="TOutput">Ouput type. The prediction will be return based on this type. /// Please see https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet/get-started/windows for more details on output type. /// </typeparam> /// <returns>PredictionModel object. This is the model object used for prediction on new instances. </returns> public PredictionModel <TInput, TOutput> Train <TInput, TOutput>() where TInput : class where TOutput : class, new() { var environment = new MLContext(seed: _seed, conc: _conc); Experiment experiment = environment.CreateExperiment(); ILearningPipelineStep step = null; List <ILearningPipelineLoader> loaders = new List <ILearningPipelineLoader>(); List <Var <ITransformModel> > transformModels = new List <Var <ITransformModel> >(); Var <ITransformModel> lastTransformModel = null; foreach (ILearningPipelineItem currentItem in this) { if (currentItem is ILearningPipelineLoader loader) { loaders.Add(loader); } step = currentItem.ApplyStep(step, experiment); if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) { transformModels.Add(dataStep.Model); }