/// <summary> /// Trains the model using the ML components in the pipeline. /// </summary> public ExtendedPredictionModel Train(IHostEnvironment environment = null) { if (environment == null) { using (var env = new ConsoleEnvironment(seed: _seed, conc: _conc)) return(Train(env)); } Experiment experiment = environment.CreateExperiment(); Legacy.ILearningPipelineStep step = null; List <Legacy.ILearningPipelineLoader> loaders = new List <Legacy.ILearningPipelineLoader>(); List <Var <ITransformModel> > transformModels = new List <Var <ITransformModel> >(); Var <ITransformModel> lastTransformModel = null; foreach (Legacy.ILearningPipelineItem currentItem in this) { if (currentItem is Legacy.ILearningPipelineLoader loader) { loaders.Add(loader); } step = currentItem.ApplyStep(step, experiment); if (step is Legacy.ILearningPipelineDataStep dataStep && dataStep.Model != null) { transformModels.Add(dataStep.Model); }
/// <summary> /// Constructs an entrypoint graph from the current pipeline. /// </summary> public AutoInference.EntryPointGraphDef ToEntryPointGraph(Experiment experiment = null) { _env.CheckValue(Learner.PipelineNode, nameof(Learner.PipelineNode)); var subGraph = experiment ?? _env.CreateExperiment(); // Insert first node Var <IDataView> lastOutput = new Var <IDataView>(); // Chain transforms var transformsModels = new List <Var <ITransformModel> >(); var viableTransforms = Transforms.ToList().Where(transform => transform.PipelineNode != null); foreach (var transform in viableTransforms) { transform.PipelineNode.SetInputData(lastOutput); var returnedDataAndModel1 = transform.PipelineNode.Add(subGraph); transformsModels.Add(returnedDataAndModel1.Model); lastOutput = returnedDataAndModel1.OutData; } // Add learner Learner.PipelineNode?.SetInputData(lastOutput); var returnedDataAndModel2 = Learner.PipelineNode?.Add(subGraph); // Create single model for featurizing and scoring data, // if transforms present. if (Transforms.Length > 0) { var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner { TransformModels = new ArrayVar <ITransformModel>(transformsModels.ToArray()), PredictorModel = returnedDataAndModel2?.Model }; var modelCombineOutput = subGraph.Add(modelCombine); return(new AutoInference.EntryPointGraphDef(subGraph, modelCombineOutput.PredictorModel, lastOutput)); } // No transforms present, so just return predictor's model. return(new AutoInference.EntryPointGraphDef(subGraph, returnedDataAndModel2?.Model, lastOutput)); }
public AutoInference.EntryPointGraphDef ToEntryPointGraph(IHostEnvironment env) { // All transforms must have associated PipelineNode objects var unsupportedTransform = Transforms.Where(transform => transform.PipelineNode == null).Cast <TransformInference.SuggestedTransform?>().FirstOrDefault(); if (unsupportedTransform != null) { throw env.ExceptNotSupp($"All transforms in recipe must have entrypoint support. {unsupportedTransform} is not yet supported."); } var subGraph = env.CreateExperiment(); Var <IDataView> lastOutput = new Var <IDataView>(); // Chain transforms var transformsModels = new List <Var <ITransformModel> >(); foreach (var transform in Transforms) { transform.PipelineNode.SetInputData(lastOutput); var transformAddResult = transform.PipelineNode.Add(subGraph); transformsModels.Add(transformAddResult.Model); lastOutput = transformAddResult.OutData; } // Add learner, if present. If not, just return transforms graph object. if (Learners.Length > 0 && Learners[0].PipelineNode != null) { // Add learner var learner = Learners[0]; learner.PipelineNode.SetInputData(lastOutput); var learnerAddResult = learner.PipelineNode.Add(subGraph); // Create single model for featurizing and scoring data, // if transforms present. if (Transforms.Length > 0) { var modelCombine = new ML.Legacy.Transforms.ManyHeterogeneousModelCombiner { TransformModels = new ArrayVar <ITransformModel>(transformsModels.ToArray()), PredictorModel = learnerAddResult.Model }; var modelCombineOutput = subGraph.Add(modelCombine); return(new AutoInference.EntryPointGraphDef(subGraph, modelCombineOutput.PredictorModel, lastOutput)); } // No transforms present, so just return predictor's model. return(new AutoInference.EntryPointGraphDef(subGraph, learnerAddResult.Model, lastOutput)); } return(new AutoInference.EntryPointGraphDef(subGraph, null, lastOutput)); }
private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) { Experiment experiment = s_environment.CreateExperiment(); var importData = new Legacy.Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { ',' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Id", Source = new [] { new TextLoaderRange(0) }, Type = Legacy.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Date", Source = new [] { new TextLoaderRange(1) }, Type = Legacy.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(2) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bedrooms", Source = new [] { new TextLoaderRange(3) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bathrooms", Source = new [] { new TextLoaderRange(4) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving", Source = new [] { new TextLoaderRange(5) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot", Source = new [] { new TextLoaderRange(6) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Floors", Source = new [] { new TextLoaderRange(7) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Waterfront", Source = new [] { new TextLoaderRange(8) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "View", Source = new [] { new TextLoaderRange(9) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Condition", Source = new [] { new TextLoaderRange(10) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Grade", Source = new [] { new TextLoaderRange(11) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftAbove", Source = new [] { new TextLoaderRange(12) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftBasement", Source = new [] { new TextLoaderRange(13) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearBuilt", Source = new [] { new TextLoaderRange(14) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearRenovated", Source = new [] { new TextLoaderRange(15) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Zipcode", Source = new [] { new TextLoaderRange(16) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Lat", Source = new [] { new TextLoaderRange(17) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Long", Source = new [] { new TextLoaderRange(18) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving15", Source = new [] { new TextLoaderRange(19) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot15", Source = new [] { new TextLoaderRange(20) }, Type = Legacy.Data.DataKind.Num }, } } //new Data.CustomTextLoader(); // importData.CustomSchema = dataSchema; // }; Legacy.Data.TextLoader.Output imported = experiment.Add(importData); var numericalConcatenate = new Legacy.Transforms.ColumnConcatenator(); numericalConcatenate.Data = imported.Data; numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"); Legacy.Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate); var categoryConcatenate = new Legacy.Transforms.ColumnConcatenator(); categoryConcatenate.Data = numericalConcatenated.OutputData; categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode"); Legacy.Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate); var categorize = new Legacy.Transforms.CategoricalOneHotVectorizer(); categorize.AddColumn("CategoryFeatures"); categorize.Data = categoryConcatenated.OutputData; Legacy.Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize); var featuresConcatenate = new Legacy.Transforms.ColumnConcatenator(); featuresConcatenate.Data = categorized.OutputData; featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures"); Legacy.Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate); var learner = new Legacy.Trainers.StochasticDualCoordinateAscentRegressor(); learner.TrainingData = featuresConcatenated.OutputData; learner.NumThreads = 1; Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner); var combineModels = new Legacy.Transforms.ManyHeterogeneousModelCombiner(); combineModels.TransformModels = new ArrayVar <ITransformModel>(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model); combineModels.PredictorModel = learnerOutput.PredictorModel; Legacy.Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels); var scorer = new Legacy.Transforms.Scorer { PredictorModel = combinedModels.PredictorModel }; var scorerOutput = experiment.Add(scorer); experiment.Compile(); experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false)); experiment.Run(); return(experiment.GetOutput(scorerOutput.ScoringTransform)); }