예제 #1
0
        /// <summary>
        /// Trains the model using the ML components in the pipeline.
        /// </summary>
        public ExtendedPredictionModel Train(IHostEnvironment environment = null)
        {
            if (environment == null)
            {
                using (var env = new ConsoleEnvironment(seed: _seed, conc: _conc))
                    return(Train(env));
            }

            Experiment experiment = environment.CreateExperiment();

            Legacy.ILearningPipelineStep          step            = null;
            List <Legacy.ILearningPipelineLoader> loaders         = new List <Legacy.ILearningPipelineLoader>();
            List <Var <ITransformModel> >         transformModels = new List <Var <ITransformModel> >();
            Var <ITransformModel> lastTransformModel = null;

            foreach (Legacy.ILearningPipelineItem currentItem in this)
            {
                if (currentItem is Legacy.ILearningPipelineLoader loader)
                {
                    loaders.Add(loader);
                }

                step = currentItem.ApplyStep(step, experiment);
                if (step is Legacy.ILearningPipelineDataStep dataStep && dataStep.Model != null)
                {
                    transformModels.Add(dataStep.Model);
                }
예제 #2
0
        /// <summary>
        /// Constructs an entrypoint graph from the current pipeline.
        /// </summary>
        public AutoInference.EntryPointGraphDef ToEntryPointGraph(Experiment experiment = null)
        {
            _env.CheckValue(Learner.PipelineNode, nameof(Learner.PipelineNode));
            var subGraph = experiment ?? _env.CreateExperiment();

            // Insert first node
            Var <IDataView> lastOutput = new Var <IDataView>();

            // Chain transforms
            var transformsModels = new List <Var <ITransformModel> >();
            var viableTransforms = Transforms.ToList().Where(transform => transform.PipelineNode != null);

            foreach (var transform in viableTransforms)
            {
                transform.PipelineNode.SetInputData(lastOutput);
                var returnedDataAndModel1 = transform.PipelineNode.Add(subGraph);
                transformsModels.Add(returnedDataAndModel1.Model);
                lastOutput = returnedDataAndModel1.OutData;
            }

            // Add learner
            Learner.PipelineNode?.SetInputData(lastOutput);
            var returnedDataAndModel2 = Learner.PipelineNode?.Add(subGraph);

            // Create single model for featurizing and scoring data,
            // if transforms present.
            if (Transforms.Length > 0)
            {
                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(transformsModels.ToArray()),
                    PredictorModel  = returnedDataAndModel2?.Model
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                return(new AutoInference.EntryPointGraphDef(subGraph, modelCombineOutput.PredictorModel, lastOutput));
            }

            // No transforms present, so just return predictor's model.
            return(new AutoInference.EntryPointGraphDef(subGraph, returnedDataAndModel2?.Model, lastOutput));
        }
            public AutoInference.EntryPointGraphDef ToEntryPointGraph(IHostEnvironment env)
            {
                // All transforms must have associated PipelineNode objects
                var unsupportedTransform = Transforms.Where(transform => transform.PipelineNode == null).Cast <TransformInference.SuggestedTransform?>().FirstOrDefault();

                if (unsupportedTransform != null)
                {
                    throw env.ExceptNotSupp($"All transforms in recipe must have entrypoint support. {unsupportedTransform} is not yet supported.");
                }
                var subGraph = env.CreateExperiment();

                Var <IDataView> lastOutput = new Var <IDataView>();

                // Chain transforms
                var transformsModels = new List <Var <ITransformModel> >();

                foreach (var transform in Transforms)
                {
                    transform.PipelineNode.SetInputData(lastOutput);
                    var transformAddResult = transform.PipelineNode.Add(subGraph);
                    transformsModels.Add(transformAddResult.Model);
                    lastOutput = transformAddResult.OutData;
                }

                // Add learner, if present. If not, just return transforms graph object.
                if (Learners.Length > 0 && Learners[0].PipelineNode != null)
                {
                    // Add learner
                    var learner = Learners[0];
                    learner.PipelineNode.SetInputData(lastOutput);
                    var learnerAddResult = learner.PipelineNode.Add(subGraph);

                    // Create single model for featurizing and scoring data,
                    // if transforms present.
                    if (Transforms.Length > 0)
                    {
                        var modelCombine = new ML.Legacy.Transforms.ManyHeterogeneousModelCombiner
                        {
                            TransformModels = new ArrayVar <ITransformModel>(transformsModels.ToArray()),
                            PredictorModel  = learnerAddResult.Model
                        };
                        var modelCombineOutput = subGraph.Add(modelCombine);

                        return(new AutoInference.EntryPointGraphDef(subGraph, modelCombineOutput.PredictorModel, lastOutput));
                    }

                    // No transforms present, so just return predictor's model.
                    return(new AutoInference.EntryPointGraphDef(subGraph, learnerAddResult.Model, lastOutput));
                }

                return(new AutoInference.EntryPointGraphDef(subGraph, null, lastOutput));
            }
예제 #4
0
        private static ITransformModel CreateKcHousePricePredictorModel(string dataPath)
        {
            Experiment experiment = s_environment.CreateExperiment();
            var        importData = new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { ',' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Id",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Legacy.Data.DataKind.Text
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Date",
                            Source = new [] { new TextLoaderRange(1) },
                            Type   = Legacy.Data.DataKind.Text
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(2) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Bedrooms",
                            Source = new [] { new TextLoaderRange(3) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Bathrooms",
                            Source = new [] { new TextLoaderRange(4) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLiving",
                            Source = new [] { new TextLoaderRange(5) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLot",
                            Source = new [] { new TextLoaderRange(6) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Floors",
                            Source = new [] { new TextLoaderRange(7) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Waterfront",
                            Source = new [] { new TextLoaderRange(8) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "View",
                            Source = new [] { new TextLoaderRange(9) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Condition",
                            Source = new [] { new TextLoaderRange(10) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Grade",
                            Source = new [] { new TextLoaderRange(11) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftAbove",
                            Source = new [] { new TextLoaderRange(12) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftBasement",
                            Source = new [] { new TextLoaderRange(13) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "YearBuilt",
                            Source = new [] { new TextLoaderRange(14) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "YearRenovated",
                            Source = new [] { new TextLoaderRange(15) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Zipcode",
                            Source = new [] { new TextLoaderRange(16) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Lat",
                            Source = new [] { new TextLoaderRange(17) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Long",
                            Source = new [] { new TextLoaderRange(18) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLiving15",
                            Source = new [] { new TextLoaderRange(19) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLot15",
                            Source = new [] { new TextLoaderRange(20) },
                            Type   = Legacy.Data.DataKind.Num
                        },
                    }
                }

                //new Data.CustomTextLoader();
                // importData.CustomSchema = dataSchema;
                //
            };

            Legacy.Data.TextLoader.Output imported = experiment.Add(importData);
            var numericalConcatenate = new Legacy.Transforms.ColumnConcatenator();

            numericalConcatenate.Data = imported.Data;
            numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15");
            Legacy.Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate);

            var categoryConcatenate = new Legacy.Transforms.ColumnConcatenator();

            categoryConcatenate.Data = numericalConcatenated.OutputData;
            categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode");
            Legacy.Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate);

            var categorize = new Legacy.Transforms.CategoricalOneHotVectorizer();

            categorize.AddColumn("CategoryFeatures");
            categorize.Data = categoryConcatenated.OutputData;
            Legacy.Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize);

            var featuresConcatenate = new Legacy.Transforms.ColumnConcatenator();

            featuresConcatenate.Data = categorized.OutputData;
            featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures");
            Legacy.Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate);

            var learner = new Legacy.Trainers.StochasticDualCoordinateAscentRegressor();

            learner.TrainingData = featuresConcatenated.OutputData;
            learner.NumThreads   = 1;
            Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner);

            var combineModels = new Legacy.Transforms.ManyHeterogeneousModelCombiner();

            combineModels.TransformModels = new ArrayVar <ITransformModel>(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model);
            combineModels.PredictorModel  = learnerOutput.PredictorModel;
            Legacy.Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels);

            var scorer = new Legacy.Transforms.Scorer
            {
                PredictorModel = combinedModels.PredictorModel
            };

            var scorerOutput = experiment.Add(scorer);

            experiment.Compile();
            experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false));
            experiment.Run();

            return(experiment.GetOutput(scorerOutput.ScoringTransform));
        }