/// <summary> /// Computes the quality metrics for the multi-class classification PredictionModel /// using the specified data set. /// </summary> /// <param name="model"> /// The trained multi-class classification PredictionModel to be evaluated. /// </param> /// <param name="testData"> /// The test data that will be predicted and used to evaluate the model. /// </param> /// <returns> /// A ClassificationMetrics instance that describes how well the model performed against the test data. /// </returns> public ClassificationMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) { var environment = new MLContext(); environment.CheckValue(model, nameof(model)); environment.CheckValue(testData, nameof(testData)); Experiment experiment = environment.CreateExperiment(); ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) { throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); } var datasetScorer = new DatasetTransformScorer { Data = testDataOutput.Data, }; DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); Data = scoreOutput.ScoredData; Output evaluteOutput = experiment.Add(this); experiment.Compile(); experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); testData.SetInput(environment, experiment); experiment.Run(); IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); if (overallMetrics == null) { throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(ClassificationEvaluator)} Evaluate."); } IDataView confusionMatrix = experiment.GetOutput(evaluteOutput.ConfusionMatrix); if (confusionMatrix == null) { throw environment.Except($"Could not find ConfusionMatrix in the results returned in {nameof(ClassificationEvaluator)} Evaluate."); } var metric = ClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); if (metric.Count != 1) { throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); } return(metric[0]); }
public DataAndModel <ITransformModel> Add(Experiment experiment) { if (_subTrainerObj != null && _entryPointObj is CommonInputs.IFeaturizerInput epFeat) { epFeat.PredictorModel = experiment.Add(_subTrainerObj).PredictorModel; } var output = experiment.Add(_entryPointObj); return(new DataAndModel <ITransformModel>(output.OutputData, output.Model)); }
private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) { var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,"; Experiment experiment = s_environment.CreateExperiment(); var importData = new Data.TextLoader(); importData.CustomSchema = dataSchema; Data.TextLoader.Output imported = experiment.Add(importData); var numericalConcatenate = new Transforms.ColumnConcatenator(); numericalConcatenate.Data = imported.Data; numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"); Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate); var categoryConcatenate = new Transforms.ColumnConcatenator(); categoryConcatenate.Data = numericalConcatenated.OutputData; categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode"); Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate); var categorize = new Transforms.CategoricalOneHotVectorizer(); categorize.AddColumn("CategoryFeatures"); categorize.Data = categoryConcatenated.OutputData; Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize); var featuresConcatenate = new Transforms.ColumnConcatenator(); featuresConcatenate.Data = categorized.OutputData; featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures"); Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate); var learner = new Trainers.StochasticDualCoordinateAscentRegressor(); learner.TrainingData = featuresConcatenated.OutputData; learner.NumThreads = 1; Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner); var combineModels = new Transforms.ManyHeterogeneousModelCombiner(); combineModels.TransformModels = new ArrayVar <ITransformModel>(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model); combineModels.PredictorModel = learnerOutput.PredictorModel; Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels); var scorer = new Transforms.Scorer { PredictorModel = combinedModels.PredictorModel }; var scorerOutput = experiment.Add(scorer); experiment.Compile(); experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false)); experiment.Run(); return(experiment.GetOutput(scorerOutput.ScoringTransform)); }
public Legacy.Models.TrainTestEvaluator.Output AddAsTrainTest(Var <IDataView> trainData, Var <IDataView> testData, MacroUtils.TrainerKinds trainerKind, Experiment experiment = null, bool includeTrainingMetrics = false) { experiment = experiment ?? _env.CreateExperiment(); var graphDef = ToEntryPointGraph(experiment); var subGraph = graphDef.Graph; var firstInput = new Var <IDataView> { VarName = graphDef.GetSubgraphFirstNodeDataVarName(_env) }; var finalOutput = graphDef.ModelOutput; // TrainTestMacro var trainTestInput = new Legacy.Models.TrainTestEvaluator { Nodes = subGraph, TransformModel = null, Inputs = { Data = firstInput }, Outputs = { PredictorModel = finalOutput }, TrainingData = trainData, TestingData = testData, Kind = MacroUtils.TrainerKindApiValue <Legacy.Models.MacroUtilsTrainerKinds>(trainerKind), PipelineId = UniqueId.ToString("N"), IncludeTrainingMetrics = includeTrainingMetrics }; var trainTestOutput = experiment.Add(trainTestInput); return(trainTestOutput); }
public void PipelineSweeperNoTransforms() { // Set up inputs for experiment string pathData = GetDataPath("adult.train"); string pathDataTest = GetDataPath("adult.test"); const int numOfSampleRows = 1000; const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=Label:R4:14 header=+"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); #pragma warning restore 0618 const int batchSize = 5; const int numIterations = 20; const int numTransformLevels = 2; var env = new MLContext(); SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(Env); // Create search object var amls = new AutoInference.AutoMlMlState(Env, metric, autoMlEngine, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, datasetTrain, datasetTest); // Infer search space amls.InferSearchSpace(numTransformLevels); // Create macro object var pipelineSweepInput = new Microsoft.ML.Legacy.Models.PipelineSweeper() { BatchSize = batchSize, }; var exp = new Experiment(Env); var output = exp.Add(pipelineSweepInput); exp.Compile(); exp.SetInput(pipelineSweepInput.TrainingData, datasetTrain); exp.SetInput(pipelineSweepInput.TestingData, datasetTest); exp.SetInput(pipelineSweepInput.State, amls); exp.SetInput(pipelineSweepInput.CandidateOutputs, new IDataView[0]); exp.Run(); // Make sure you get back an AutoMlState, and that it ran for correct number of iterations // with at least minimal performance values (i.e., best should have AUC better than 0.1 on this dataset). AutoInference.AutoMlMlState amlsOut = (AutoInference.AutoMlMlState)exp.GetOutput(output.State); Assert.NotNull(amlsOut); Assert.Equal(amlsOut.GetAllEvaluatedPipelines().Length, numIterations); Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.8); }
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { Contracts.Assert(previousStep == null); _dataViewEntryPoint = new Data.DataViewReference(); var importOutput = experiment.Add(_dataViewEntryPoint); return(new CollectionDataSourcePipelineStep(importOutput.Data)); }
/// <summary> /// Computes the quality metrics for the PredictionModel using the specified data set. /// </summary> /// <param name="model"> /// The trained PredictionModel to be evaluated. /// </param> /// <param name="testData"> /// The test data that will be predicted and used to evaulate the model. /// </param> /// <returns> /// A RegressionMetrics instance that describes how well the model performed against the test data. /// </returns> public RegressionMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) { using (var environment = new TlcEnvironment()) { environment.CheckValue(model, nameof(model)); environment.CheckValue(testData, nameof(testData)); Experiment experiment = environment.CreateExperiment(); ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) { throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); } var datasetScorer = new DatasetTransformScorer { Data = testDataOutput.Data, }; DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); Data = scoreOutput.ScoredData; Output evaluteOutput = experiment.Add(this); experiment.Compile(); experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); testData.SetInput(environment, experiment); experiment.Run(); IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); if (overallMetrics == null) { throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(RegressionEvaluator)} Evaluate."); } return(RegressionMetrics.FromOverallMetrics(environment, overallMetrics)); } }
/// <summary> /// <see href="https://onnx.ai/">ONNX</see> is an intermediate representation format /// for machine learning models. It is used to make models portable such that you can /// train a model using a toolkit and run it in another tookit's runtime, for example, /// you can create a model using ML.NET, export it to an ONNX-ML model file, /// then load and run that ONNX-ML model in Windows ML, on an UWP Windows 10 app. /// /// This API converts an ML.NET model to ONNX-ML format by inspecting the transform pipeline /// from the end, checking for components that know how to save themselves as ONNX. /// The first item in the transform pipeline that does not know how to save itself /// as ONNX, is considered the "input" to the ONNX pipeline. (Ideally this would be the /// original loader itself, but this may not be possible if the user used unsavable /// transforms in defining the pipe.) All the columns in the source that are a type the /// ONNX knows how to deal with will be tracked. Intermediate transformations of the /// data appearing as new columns will appear in the output block of the ONNX, with names /// derived from the corresponding column names. The ONNX JSON will be serialized to a /// path defined through the Json option. /// /// This API supports the following arguments: /// <see cref="Onnx"/> indicates the file to write the ONNX protocol buffer file to. This is optional. /// <see cref="Json"/> indicates the file to write the JSON representation of the ONNX model. This is optional. /// <see cref="Name"/> indicates the name property in the ONNX model. If left unspecified, it will /// be the extension-less name of the file specified in the onnx indicates the protocol buffer file /// to write the ONNX representation to. /// <see cref="Domain"/> indicates the domain name of the model. ONNX uses reverse domain name space indicators. /// For example com.microsoft.cognitiveservices. This is a required field. /// <see cref="InputsToDrop"/> is a string array of input column names to omit from the input mapping. /// A common scenario might be to drop the label column, for instance, since it may not be practically /// useful for the pipeline. Note that any columns depending on these naturally cannot be saved. /// <see cref="OutputsToDrop"/> is similar, except for the output schema. Note that the pipeline handler /// is currently not intelligent enough to drop intermediate calculations that produce this value: this will /// merely omit that value from the actual output. /// /// Transforms that can be exported to ONNX /// 1. Concat /// 2. KeyToVector /// 3. NAReplace /// 4. Normalize /// 5. Term /// 6. Categorical /// /// Learners that can be exported to ONNX /// 1. FastTree /// 2. LightGBM /// 3. Logistic Regression /// /// See <a href="https://github.com/dotnet/machinelearning/blob/master/test/Microsoft.ML.Tests/OnnxTests.cs"/> /// for an example on how to train a model and then convert that model to ONNX. /// </summary> /// <param name="model">Model that needs to be converted to ONNX format.</param> public void Convert(PredictionModel model) { using (var environment = new TlcEnvironment()) { environment.CheckValue(model, nameof(model)); Experiment experiment = environment.CreateExperiment(); experiment.Add(this); experiment.Compile(); experiment.SetInput(Model, model.PredictorModel); experiment.Run(); } }
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { using (var env = new TlcEnvironment()) { var subgraph = env.CreateExperiment(); subgraph.Add(_trainer); var ova = new OneVersusAll(); if (previousStep != null) { if (!(previousStep is ILearningPipelineDataStep dataStep)) { throw new InvalidOperationException($"{ nameof(OneVersusAll)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); } _data = dataStep.Data; ova.TrainingData = dataStep.Data; ova.UseProbabilities = _useProbabilities; ova.Nodes = subgraph; } Output output = experiment.Add(ova); return(new OvaPipelineStep(output)); } }
public static CommonOutputs.MacroOutput <Output> TrainTest( IHostEnvironment env, Arguments input, EntryPointNode node) { // Create default pipeline ID if one not given. input.PipelineId = input.PipelineId ?? Guid.NewGuid().ToString("N"); // Parse the subgraph. var subGraphRunContext = new RunContext(env); var subGraphNodes = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes, node.Catalog); // Change the subgraph to use the training data as input. var varName = input.Inputs.Data.VarName; VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } if (!subGraphRunContext.TryGetVariable(varName, out var dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } var trainingVar = node.GetInputVariable(nameof(input.TrainingData)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameInputVariable(dataVariable.Name, trainingVar); } subGraphRunContext.RemoveVariable(dataVariable); // Change the subgraph to use the model variable as output. varName = input.Outputs.Model.VarName; if (!subGraphRunContext.TryGetVariable(varName, out dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } string outputVarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameOutputVariable(dataVariable.Name, outputVarName); } subGraphRunContext.RemoveVariable(dataVariable); // Move the variables from the subcontext to the main context. node.Context.AddContextVariables(subGraphRunContext); // Change all the subgraph nodes to use the main context. foreach (var subGraphNode in subGraphNodes) { subGraphNode.SetContext(node.Context); } // Testing using test data set var testingVar = node.GetInputVariable(nameof(input.TestingData)); var exp = new Experiment(env); //combine the predictor model with any potential transfrom model passed from the outer graph if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner { TransformModel = { VarName = transformModelVarName.VariableName }, PredictorModel = { VarName = outputVarName } }; var modelCombineOutput = exp.Add(modelCombine); outputVarName = modelCombineOutput.PredictorModel.VarName; } // Add the scoring node for testing. var scoreNode = new ML.Transforms.DatasetScorer { Data = { VarName = testingVar.ToJson() }, PredictorModel = { VarName = outputVarName } }; var scoreNodeOutput = exp.Add(scoreNode); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Do not double-add previous nodes. exp.Reset(); // REVIEW: we need to extract the proper label column name here to pass to the evaluators. // This is where you would add code to do it. var settings = new MacroUtils.EvaluatorSettings { LabelColumn = DefaultColumnNames.Label }; string outVariableName; if (input.IncludeTrainingMetrics) { // Add the scoring node for training. var scoreNodeTraining = new ML.Transforms.DatasetScorer { Data = { VarName = trainingVar.ToJson() }, PredictorModel = { VarName = outputVarName } }; var scoreNodeTrainingOutput = exp.Add(scoreNodeTraining); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Do not double-add previous nodes. exp.Reset(); // Add the evaluator node for training. var evalInputOutputTraining = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNodeTraining = evalInputOutputTraining.Item1; var evalOutputTraining = evalInputOutputTraining.Item2; evalNodeTraining.Data.VarName = scoreNodeTrainingOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out outVariableName)) { evalOutputTraining.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingOverallMetrics), out outVariableName)) { evalOutputTraining.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingPerInstanceMetrics), out outVariableName)) { evalOutputTraining.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingConfusionMatrix), out outVariableName) && evalOutputTraining is CommonOutputs.IClassificationEvaluatorOutput eoTraining) { eoTraining.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNodeTraining, evalOutputTraining); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); } // Do not double-add previous nodes. exp.Reset(); // Add the evaluator node for testing. var evalInputOutput = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNode = evalInputOutput.Item1; var evalOutput = evalInputOutput.Item2; evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out outVariableName)) { evalOutput.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.OverallMetrics), out outVariableName)) { evalOutput.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.PerInstanceMetrics), out outVariableName)) { evalOutput.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.ConfusionMatrix), out outVariableName) && evalOutput is CommonOutputs.IClassificationEvaluatorOutput eo) { eo.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNode, evalOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Marks as an atomic unit that can be run in // a distributed fashion. foreach (var subGraphNode in subGraphNodes) { subGraphNode.StageId = input.PipelineId; } return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
public static CommonOutputs.MacroOutput <Output> CrossValidate( IHostEnvironment env, Arguments input, EntryPointNode node) { env.CheckValue(input, nameof(input)); // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); //the input transform model VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } // Split the input data into folds. var exp = new Experiment(env); var cvSplit = new Models.CrossValidatorDatasetSplitter(); cvSplit.Data.VarName = node.GetInputVariable("Data").ToJson(); cvSplit.NumFolds = input.NumFolds; cvSplit.StratificationColumn = input.StratificationColumn; var cvSplitOutput = exp.Add(cvSplit); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); var predModelVars = new Var <IPredictorModel> [input.NumFolds]; var transformModelVars = new Var <ITransformModel> [input.NumFolds]; var inputTransformModelVars = new Var <IPredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes, node.Catalog); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()), TransformModel = null, LabelColumn = input.LabelColumn, GroupColumn = input.GroupColumn, WeightColumn = input.WeightColumn }; if (transformModelVarName != null) { args.TransformModel = new Var <ITransformModel> { VarName = transformModelVarName.VariableName } } ; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName)) { args.Outputs.PredictorModel = new Var <IPredictorModel> { VarName = mapping[input.Outputs.PredictorModel.VarName] }; } else { args.Outputs.PredictorModel = null; } if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName)) { args.Outputs.TransformModel = new Var <ITransformModel> { VarName = mapping[input.Outputs.TransformModel.VarName] }; } else { args.Outputs.TransformModel = null; } // Set train/test trainer kind to match. args.Kind = input.Kind; // Set the input bindings for the TrainTest entry point. var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k)); var outputMap = new Dictionary <string, string>(); var transformModelVar = new Var <ITransformModel>(); var predModelVar = new Var <IPredictorModel>(); if (input.Outputs.PredictorModel == null) { outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName); transformModelVars[k] = transformModelVar; ML.Transforms.ModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.ModelCombiner { Models = new ArrayVar <ITransformModel>( new Var <ITransformModel>[] { new Var <ITransformModel> { VarName = transformModelVarName.VariableName }, transformModelVar } ) }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); transformModelVars[k] = modelCombineOutput.OutputModel; } } else { outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; ML.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner { TransformModel = { VarName = transformModelVarName.VariableName }, PredictorModel = predModelVar }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); predModelVars[k] = modelCombineOutput.PredictorModel; } } var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator"; subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap)); } exp.Reset(); // Convert predictors from all folds into an array of predictors. if (input.Outputs.PredictorModel == null) { var outModels = new ML.Data.TransformModelArrayConverter { TransformModel = new ArrayVar <ITransformModel>(transformModelVars) }; var outModelsOutput = new ML.Data.TransformModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel)); exp.Add(outModels, outModelsOutput); } else { var outModels = new ML.Data.PredictorModelArrayConverter { Model = new ArrayVar <IPredictorModel>(predModelVars) }; var outModelsOutput = new ML.Data.PredictorModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); exp.Add(outModels, outModelsOutput); } // Convert warnings data views from all folds into an array of data views. var warnings = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(warningsVars) }; var warningsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(warnings, warningsOutput); // Convert overall metrics data views from all folds into an array of data views. var overallMetrics = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(overallMetricsVars) }; var overallMetricsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(overallMetrics, overallMetricsOutput); // Convert per instance data views from all folds into an array of data views. var instanceMetrics = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(instanceMetricsVars) }; var instanceMetricsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(instanceMetrics, instanceMetricsOutput); ML.Data.IDataViewArrayConverter.Output confusionMatricesOutput = null; if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer || input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer) { // Convert confusion matrix data views from all folds into an array of data views. var confusionMatrices = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(confusionMatrixVars) }; confusionMatricesOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(confusionMatrices, confusionMatricesOutput); } var combineArgs = new CombineMetricsInput(); combineArgs.Kind = input.Kind; combineArgs.LabelColumn = input.LabelColumn; combineArgs.WeightColumn = input.WeightColumn; combineArgs.GroupColumn = input.GroupColumn; // Set the input bindings for the CombineMetrics entry point. var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var combineInputMap = new Dictionary <ParameterBinding, VariableBinding>(); var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics)); combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> { overallArray }); combineInputMap.Add(overallArray, new SimpleVariableBinding(overallMetricsOutput.OutputData.VarName)); var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics)); combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> { combinePerInstArray }); combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceMetricsOutput.OutputData.VarName)); if (confusionMatricesOutput != null) { var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix)); combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> { combineConfArray }); combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatricesOutput.OutputData.VarName)); } var combineOutputMap = new Dictionary <string, string>(); var combineWarningVar = new Var <IDataView>(); combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName); var combineOverallMetric = new Var <IDataView>(); combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName); var combineInstanceMetric = new Var <IDataView>(); combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName); if (confusionMatricesOutput != null) { var combineConfusionMatrix = new Var <IDataView>(); combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName); } subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); subGraphNodes.Add(EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Catalog, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap)); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
public DataAndModel <IPredictorModel> Add(Experiment experiment) { var output = experiment.Add(_entryPointObj); return(new DataAndModel <IPredictorModel>(_entryPointObj.TrainingData, output.PredictorModel)); }
public static CommonOutputs.MacroOutput <Output> CrossValidateBinary( IHostEnvironment env, Arguments input, EntryPointNode node) { // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); // Split the input data into folds. var exp = new Experiment(env); var cvSplit = new Legacy.Models.CrossValidatorDatasetSplitter(); cvSplit.Data.VarName = node.GetInputVariable("Data").ToJson(); cvSplit.NumFolds = input.NumFolds; cvSplit.StratificationColumn = input.StratificationColumn; var cvSplitOutput = exp.Add(cvSplit); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); var predModelVars = new Var <PredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestBinaryMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()) }; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; args.Outputs.Model = new Var <PredictorModel> { VarName = mapping[input.Outputs.Model.VarName] }; // Set the input bindings for the TrainTest entry point. var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k)); var outputMap = new Dictionary <string, string>(); var predModelVar = new Var <PredictorModel>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; subGraphNodes.Add(EntryPointNode.Create(env, "Models.TrainTestBinaryEvaluator", args, node.Context, inputBindingMap, inputMap, outputMap)); } exp.Reset(); var outModels = new Legacy.Data.PredictorModelArrayConverter { Model = new ArrayVar <PredictorModel>(predModelVars) }; var outModelsOutput = new Legacy.Data.PredictorModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); exp.Add(outModels, outModelsOutput); var warnings = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(warningsVars) }; var warningsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); warningsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); exp.Add(warnings, warningsOutput); var overallMetrics = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(overallMetricsVars) }; var overallMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); overallMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); exp.Add(overallMetrics, overallMetricsOutput); var instanceMetrics = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(instanceMetricsVars) }; var instanceMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); instanceMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); exp.Add(instanceMetrics, instanceMetricsOutput); var confusionMatrices = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(confusionMatrixVars) }; var confusionMatricesOutput = new Legacy.Data.IDataViewArrayConverter.Output(); confusionMatricesOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); exp.Add(confusionMatrices, confusionMatricesOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
public static CommonOutputs.MacroOutput <Output> PipelineSweep( IHostEnvironment env, Arguments input, EntryPointNode node) { env.Check(input.StateArguments != null || input.State is AutoInference.AutoMlMlState, "Must have a valid AutoML State, or pass arguments to create one."); env.Check(input.BatchSize > 0, "Batch size must be > 0."); // Get the user-defined column roles (if any) var dataRoles = GetDataRoles(env, input); // If no current state, create object and set data. if (input.State == null) { input.State = input.StateArguments?.CreateComponent(env); if (input.State is AutoInference.AutoMlMlState inState) { inState.SetTrainTestData(input.TrainingData, input.TestingData); } else { throw env.Except($"Incompatible type. Expecting type {typeof(AutoInference.AutoMlMlState)}, received type {input.State?.GetType()}."); } var result = node.AddNewVariable("State", input.State); node.Context.AddInputVariable(result.Item2, typeof(IMlState)); } var autoMlState = (AutoInference.AutoMlMlState)input.State; // The indicators are just so the macro knows those pipelines need to // be run before performing next expansion. If we add them as inputs // to the next iteration, the next iteration cannot run until they have // their values set. Thus, indicators are needed. var pipelineIndicators = new List <Var <IDataView> >(); var expNodes = new List <EntryPointNode>(); // Keep versions of the training and testing var names var training = new Var <IDataView> { VarName = node.GetInputVariable("TrainingData").VariableName }; var testing = new Var <IDataView> { VarName = node.GetInputVariable("TestingData").VariableName }; var amlsVarObj = new Var <IMlState>() { VarName = node.GetInputVariable(nameof(input.State)).VariableName }; // Make sure search space is defined. If not, infer, // with default number of transform levels. if (!autoMlState.IsSearchSpaceDefined()) { autoMlState.InferSearchSpace(numTransformLevels: 1, dataRoles); } // Extract performance summaries and assign to previous candidate pipelines. foreach (var pipeline in autoMlState.BatchCandidates) { if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId), out var v) && node.Context.TryGetVariable(AutoMlUtils.GenerateOverallTrainingMetricVarName(pipeline.UniqueId), out var v2)) { pipeline.PerformanceSummary = AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name, (IDataView)v2.Value); autoMlState.AddEvaluated(pipeline); } } node.OutputMap.TryGetValue("Results", out string outDvName); var outDvVar = new Var <IDataView>() { VarName = outDvName }; node.OutputMap.TryGetValue("State", out string outStateName); var outStateVar = new Var <IMlState>() { VarName = outStateName }; // Get next set of candidates. var candidatePipelines = autoMlState.GetNextCandidates(input.BatchSize); // Check if termination condition was met, i.e. no more candidates were returned. // If so, end expansion and add a node to extract the sweep result. if (candidatePipelines == null || candidatePipelines.Length == 0) { // Add a node to extract the sweep result. var resultSubgraph = new Experiment(env); var resultNode = new Microsoft.ML.Legacy.Models.SweepResultExtractor() { State = amlsVarObj }; var resultOutput = new Legacy.Models.SweepResultExtractor.Output() { State = outStateVar, Results = outDvVar }; resultSubgraph.Add(resultNode, resultOutput); var resultSubgraphNodes = EntryPointNode.ValidateNodes(env, node.Context, resultSubgraph.GetNodes()); expNodes.AddRange(resultSubgraphNodes); return(new CommonOutputs.MacroOutput <Output>() { Nodes = expNodes }); } // Prep all returned candidates foreach (var p in candidatePipelines) { // Add train test experiments to current graph for candidate pipeline var subgraph = new Experiment(env); var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph, true); // Change variable name to reference pipeline ID in output map, context and entrypoint output. var uniqueName = ExperimentUtils.GenerateOverallMetricVarName(p.UniqueId); var uniqueNameTraining = AutoMlUtils.GenerateOverallTrainingMetricVarName(p.UniqueId); var sgNode = EntryPointNode.ValidateNodes(env, node.Context, new JArray(subgraph.GetNodes().Last())).Last(); sgNode.RenameOutputVariable(trainTestOutput.OverallMetrics.VarName, uniqueName, cascadeChanges: true); sgNode.RenameOutputVariable(trainTestOutput.TrainingOverallMetrics.VarName, uniqueNameTraining, cascadeChanges: true); trainTestOutput.OverallMetrics.VarName = uniqueName; trainTestOutput.TrainingOverallMetrics.VarName = uniqueNameTraining; expNodes.Add(sgNode); // Store indicators, to pass to next iteration of macro. pipelineIndicators.Add(trainTestOutput.OverallMetrics); } // Add recursive macro node var macroSubgraph = new Experiment(env); var macroNode = new Legacy.Models.PipelineSweeper() { BatchSize = input.BatchSize, CandidateOutputs = new ArrayVar <IDataView>(pipelineIndicators.ToArray()), TrainingData = training, TestingData = testing, State = amlsVarObj }; var output = new Legacy.Models.PipelineSweeper.Output() { Results = outDvVar, State = outStateVar }; macroSubgraph.Add(macroNode, output); var subgraphNodes = EntryPointNode.ValidateNodes(env, node.Context, macroSubgraph.GetNodes()); expNodes.AddRange(subgraphNodes); return(new CommonOutputs.MacroOutput <Output>() { Nodes = expNodes }); }
private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) { Experiment experiment = s_environment.CreateExperiment(); var importData = new Legacy.Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { ',' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Id", Source = new [] { new TextLoaderRange(0) }, Type = Legacy.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Date", Source = new [] { new TextLoaderRange(1) }, Type = Legacy.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(2) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bedrooms", Source = new [] { new TextLoaderRange(3) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bathrooms", Source = new [] { new TextLoaderRange(4) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving", Source = new [] { new TextLoaderRange(5) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot", Source = new [] { new TextLoaderRange(6) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Floors", Source = new [] { new TextLoaderRange(7) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Waterfront", Source = new [] { new TextLoaderRange(8) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "View", Source = new [] { new TextLoaderRange(9) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Condition", Source = new [] { new TextLoaderRange(10) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Grade", Source = new [] { new TextLoaderRange(11) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftAbove", Source = new [] { new TextLoaderRange(12) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftBasement", Source = new [] { new TextLoaderRange(13) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearBuilt", Source = new [] { new TextLoaderRange(14) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearRenovated", Source = new [] { new TextLoaderRange(15) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Zipcode", Source = new [] { new TextLoaderRange(16) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Lat", Source = new [] { new TextLoaderRange(17) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Long", Source = new [] { new TextLoaderRange(18) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving15", Source = new [] { new TextLoaderRange(19) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot15", Source = new [] { new TextLoaderRange(20) }, Type = Legacy.Data.DataKind.Num }, } } //new Data.CustomTextLoader(); // importData.CustomSchema = dataSchema; // }; Legacy.Data.TextLoader.Output imported = experiment.Add(importData); var numericalConcatenate = new Legacy.Transforms.ColumnConcatenator(); numericalConcatenate.Data = imported.Data; numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"); Legacy.Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate); var categoryConcatenate = new Legacy.Transforms.ColumnConcatenator(); categoryConcatenate.Data = numericalConcatenated.OutputData; categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode"); Legacy.Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate); var categorize = new Legacy.Transforms.CategoricalOneHotVectorizer(); categorize.AddColumn("CategoryFeatures"); categorize.Data = categoryConcatenated.OutputData; Legacy.Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize); var featuresConcatenate = new Legacy.Transforms.ColumnConcatenator(); featuresConcatenate.Data = categorized.OutputData; featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures"); Legacy.Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate); var learner = new Legacy.Trainers.StochasticDualCoordinateAscentRegressor(); learner.TrainingData = featuresConcatenated.OutputData; learner.NumThreads = 1; Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner); var combineModels = new Legacy.Transforms.ManyHeterogeneousModelCombiner(); combineModels.TransformModels = new ArrayVar <ITransformModel>(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model); combineModels.PredictorModel = learnerOutput.PredictorModel; Legacy.Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels); var scorer = new Legacy.Transforms.Scorer { PredictorModel = combinedModels.PredictorModel }; var scorerOutput = experiment.Add(scorer); experiment.Compile(); experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false)); experiment.Run(); return(experiment.GetOutput(scorerOutput.ScoringTransform)); }
public static CommonOutputs.MacroOutput <Output> TrainTestBinary( IHostEnvironment env, Arguments input, EntryPointNode node) { // Parse the subgraph. var subGraphRunContext = new RunContext(env); var subGraphNodes = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes); // Change the subgraph to use the training data as input. var varName = input.Inputs.Data.VarName; EntryPointVariable variable; if (!subGraphRunContext.TryGetVariable(varName, out variable)) { throw env.Except($"Invalid variable name '{varName}'."); } var trainingVar = node.GetInputVariable("TrainingData"); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameInputVariable(variable.Name, trainingVar); } subGraphRunContext.RemoveVariable(variable); // Change the subgraph to use the model variable as output. varName = input.Outputs.Model.VarName; if (!subGraphRunContext.TryGetVariable(varName, out variable)) { throw env.Except($"Invalid variable name '{varName}'."); } string outputVarName = node.GetOutputVariableName("PredictorModel"); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameOutputVariable(variable.Name, outputVarName); } subGraphRunContext.RemoveVariable(variable); // Move the variables from the subcontext to the main context. node.Context.AddContextVariables(subGraphRunContext); // Change all the subgraph nodes to use the main context. foreach (var subGraphNode in subGraphNodes) { subGraphNode.SetContext(node.Context); } // Add the scoring node. var testingVar = node.GetInputVariable("TestingData"); var exp = new Experiment(env); var scoreNode = new Legacy.Transforms.DatasetScorer(); scoreNode.Data.VarName = testingVar.ToJson(); scoreNode.PredictorModel.VarName = outputVarName; var scoreNodeOutput = exp.Add(scoreNode); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); // Add the evaluator node. exp.Reset(); var evalNode = new Legacy.Models.BinaryClassificationEvaluator(); evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName; var evalOutput = new Legacy.Models.BinaryClassificationEvaluator.Output(); string outVariableName; if (node.OutputMap.TryGetValue("Warnings", out outVariableName)) { evalOutput.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue("OverallMetrics", out outVariableName)) { evalOutput.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue("PerInstanceMetrics", out outVariableName)) { evalOutput.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue("ConfusionMatrix", out outVariableName)) { evalOutput.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNode, evalOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); var stageId = Guid.NewGuid().ToString("N"); foreach (var subGraphNode in subGraphNodes) { subGraphNode.StageId = stageId; } return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
private static Tuple <List <EntryPointNode>, Var <IPredictorModel> > ProcessClass(IHostEnvironment env, int k, string label, Arguments input, EntryPointNode node) { var macroNodes = new List <EntryPointNode>(); // Convert label into T,F based on k. var remapper = new Legacy.Transforms.LabelIndicator { ClassIndex = k, Column = new[] { new Legacy.Transforms.LabelIndicatorTransformColumn { ClassIndex = k, Name = label, Source = label } }, Data = { VarName = node.GetInputVariable(nameof(input.TrainingData)).ToJson() } }; var exp = new Experiment(env); var remapperOutNode = exp.Add(remapper); var subNodes = EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()); macroNodes.AddRange(subNodes); // Parse the nodes in input.Nodes into a temporary run context. var subGraphRunContext = new RunContext(env); var subGraphNodes = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); bool foundOutput = false; Var <IPredictorModel> predModelVar = null; foreach (var entryPointNode in subGraphNodes) { // Rename variables in input/output maps, and in subgraph context. entryPointNode.RenameAllVariables(mapping); foreach (var kvp in mapping) { subGraphRunContext.RenameContextVariable(kvp.Key, kvp.Value); } // Grab a hold of output model from this subgraph. if (entryPointNode.GetOutputVariableName("PredictorModel") is string mvn) { predModelVar = new Var <IPredictorModel> { VarName = mvn }; foundOutput = true; } // Connect label remapper output to wherever training data was expected within the input graph. if (entryPointNode.GetInputVariable(nameof(input.TrainingData)) is VariableBinding vb) { vb.Rename(remapperOutNode.OutputData.VarName); } // Change node to use the main context. entryPointNode.SetContext(node.Context); } // Move the variables from the subcontext to the main context. node.Context.AddContextVariables(subGraphRunContext); // Make sure we found the output variable for this model. if (!foundOutput) { throw new Exception("Invalid input graph. Does not output predictor model."); } // Add training subgraph to our context. macroNodes.AddRange(subGraphNodes); return(new Tuple <List <EntryPointNode>, Var <IPredictorModel> >(macroNodes, predModelVar)); }
public static CommonOutputs.MacroOutput <Output> OneVersusAll( IHostEnvironment env, Arguments input, EntryPointNode node) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); env.Assert(input.Nodes.Count > 0); var numClasses = GetNumberOfClasses(env, input, out var label); var predModelVars = new Var <IPredictorModel> [numClasses]; // This will be the final resulting list of nodes that is returned from the macro. var macroNodes = new List <EntryPointNode>(); // Instantiate the subgraph for each label value. for (int k = 0; k < numClasses; k++) { var result = ProcessClass(env, k, label, input, node); predModelVars[k] = result.Item2; macroNodes.AddRange(result.Item1); } // Use OVA model combiner to combine these models into one. // Takes in array of models that are binary predictor models and // produces single multiclass predictor model. var macroExperiment = new Experiment(env); var combinerNode = new Legacy.Models.OvaModelCombiner { ModelArray = new ArrayVar <IPredictorModel>(predModelVars), TrainingData = new Var <IDataView> { VarName = node.GetInputVariable(nameof(input.TrainingData)).VariableName }, Caching = (Legacy.Models.CachingOptions)input.Caching, FeatureColumn = input.FeatureColumn, NormalizeFeatures = (Legacy.Models.NormalizeOption)input.NormalizeFeatures, LabelColumn = input.LabelColumn, UseProbabilities = input.UseProbabilities }; // Get output model variable. if (!node.OutputMap.TryGetValue(nameof(Output.PredictorModel), out var outVariableName)) { throw new Exception("Cannot find OVA model output."); } // Map macro's output back to OVA combiner (so OVA combiner will set the value on our output variable). var combinerOutput = new Legacy.Models.OvaModelCombiner.Output { PredictorModel = new Var <IPredictorModel> { VarName = outVariableName } }; // Add to experiment (must be done AFTER we assign variable name to output). macroExperiment.Add(combinerNode, combinerOutput); // Add nodes to main experiment. var nodes = macroExperiment.GetNodes(); var expNodes = EntryPointNode.ValidateNodes(env, node.Context, nodes); macroNodes.AddRange(expNodes); return(new CommonOutputs.MacroOutput <Output>() { Nodes = macroNodes }); }