protected override IEnumerable <SuggestedRecipe> ApplyCore(Type predictorType,
                                                                       TransformInference.SuggestedTransform[] transforms)
            {
                SuggestedRecipe.SuggestedLearner learner = new SuggestedRecipe.SuggestedLearner();
                if (predictorType == typeof(SignatureMultiClassClassifierTrainer))
                {
                    learner.LoadableClassInfo =
                        ComponentCatalog.GetLoadableClassInfo <SignatureTrainer>(Learners.SdcaMultiClassTrainer.LoadNameValue);
                }
                else
                {
                    learner.LoadableClassInfo =
                        ComponentCatalog.GetLoadableClassInfo <SignatureTrainer>(Learners.LinearClassificationTrainer.LoadNameValue);
                    var epInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier();
                    learner.PipelineNode = new TrainerPipelineNode(epInput);
                }

                learner.Settings = "";
                yield return(new SuggestedRecipe(ToString(), transforms, new[] { learner }));
            }
        public void TestOvaMacro()
        {
            var dataPath = GetDataPath(@"iris.txt");
            var env      = new MLContext(42);
            // Specify subgraph for OVA
            var subGraph     = env.CreateExperiment();
            var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier {
                NumThreads = 1
            };
            var learnerOutput = subGraph.Add(learnerInput);
            // Create pipeline with OVA and multiclass scoring.
            var experiment  = env.CreateExperiment();
            var importInput = new Legacy.Data.TextLoader(dataPath);

            importInput.Arguments.Column = new TextLoaderColumn[]
            {
                new TextLoaderColumn {
                    Name = "Label", Source = new[] { new TextLoaderRange(0) }
                },
                new TextLoaderColumn {
                    Name = "Features", Source = new[] { new TextLoaderRange(1, 4) }
                }
            };
            var importOutput = experiment.Add(importInput);
            var oneVersusAll = new Legacy.Models.OneVersusAll
            {
                TrainingData     = importOutput.Data,
                Nodes            = subGraph,
                UseProbabilities = true,
            };
            var ovaOutput  = experiment.Add(oneVersusAll);
            var scoreInput = new Legacy.Transforms.DatasetScorer
            {
                Data           = importOutput.Data,
                PredictorModel = ovaOutput.PredictorModel
            };
            var scoreOutput = experiment.Add(scoreInput);
            var evalInput   = new Legacy.Models.ClassificationEvaluator
            {
                Data = scoreOutput.ScoredData
            };
            var evalOutput = experiment.Add(evalInput);

            experiment.Compile();
            experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
            experiment.Run();

            var data   = experiment.GetOutput(evalOutput.OverallMetrics);
            var schema = data.Schema;
            var b      = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol);

            Assert.True(b);
            using (var cursor = data.GetRowCursor(col => col == accCol))
            {
                var getter = cursor.GetGetter <double>(accCol);
                b = cursor.MoveNext();
                Assert.True(b);
                double acc = 0;
                getter(ref acc);
                Assert.Equal(0.96, acc, 2);
                b = cursor.MoveNext();
                Assert.False(b);
            }
        }
        public void TestCrossValidationMacroWithStratification()
        {
            var dataPath = GetDataPath(@"breast-cancer.txt");
            var env      = new MLContext(42);
            var subGraph = env.CreateExperiment();

            var nop       = new Legacy.Transforms.NoOperation();
            var nopOutput = subGraph.Add(nop);

            var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
            {
                TrainingData = nopOutput.OutputData,
                NumThreads   = 1
            };
            var learnerOutput = subGraph.Add(learnerInput);

            var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
            {
                TransformModels = new ArrayVar <TransformModel>(nopOutput.Model),
                PredictorModel  = learnerOutput.PredictorModel
            };
            var modelCombineOutput = subGraph.Add(modelCombine);

            var experiment  = env.CreateExperiment();
            var importInput = new Legacy.Data.TextLoader(dataPath);

            importInput.Arguments.Column = new Legacy.Data.TextLoaderColumn[]
            {
                new Legacy.Data.TextLoaderColumn {
                    Name = "Label", Source = new[] { new Legacy.Data.TextLoaderRange(0) }
                },
                new Legacy.Data.TextLoaderColumn {
                    Name = "Strat", Source = new[] { new Legacy.Data.TextLoaderRange(1) }
                },
                new Legacy.Data.TextLoaderColumn {
                    Name = "Features", Source = new[] { new Legacy.Data.TextLoaderRange(2, 9) }
                }
            };
            var importOutput = experiment.Add(importInput);

            var crossValidate = new Legacy.Models.CrossValidator
            {
                Data                 = importOutput.Data,
                Nodes                = subGraph,
                TransformModel       = null,
                StratificationColumn = "Strat"
            };

            crossValidate.Inputs.Data            = nop.Data;
            crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel;
            var crossValidateOutput = experiment.Add(crossValidate);

            experiment.Compile();
            experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
            experiment.Run();
            var data = experiment.GetOutput(crossValidateOutput.OverallMetrics);

            var schema = data.Schema;
            var b      = schema.TryGetColumnIndex("AUC", out int metricCol);

            Assert.True(b);
            b = schema.TryGetColumnIndex("Fold Index", out int foldCol);
            Assert.True(b);
            using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol))
            {
                var getter                 = cursor.GetGetter <double>(metricCol);
                var foldGetter             = cursor.GetGetter <ReadOnlyMemory <char> >(foldCol);
                ReadOnlyMemory <char> fold = default;

                // Get the verage.
                b = cursor.MoveNext();
                Assert.True(b);
                double avg = 0;
                getter(ref avg);
                foldGetter(ref fold);
                Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold));

                // Get the standard deviation.
                b = cursor.MoveNext();
                Assert.True(b);
                double stdev = 0;
                getter(ref stdev);
                foldGetter(ref fold);
                Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold));
                Assert.Equal(0.00488, stdev, 5);

                double sum = 0;
                double val = 0;
                for (int f = 0; f < 2; f++)
                {
                    b = cursor.MoveNext();
                    Assert.True(b);
                    getter(ref val);
                    foldGetter(ref fold);
                    sum += val;
                    Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold));
                }
                Assert.Equal(avg, sum / 2);
                b = cursor.MoveNext();
                Assert.False(b);
            }
        }
        public void TestSimpleTrainExperiment()
        {
            var dataPath   = GetDataPath("adult.tiny.with-schema.txt");
            var env        = new MLContext();
            var experiment = env.CreateExperiment();

            var importInput  = new Legacy.Data.TextLoader(dataPath);
            var importOutput = experiment.Add(importInput);

            var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer
            {
                Data = importOutput.Data
            };

            catInput.AddColumn("Categories");
            var catOutput = experiment.Add(catInput);

            var concatInput = new Legacy.Transforms.ColumnConcatenator
            {
                Data = catOutput.OutputData
            };

            concatInput.AddColumn("Features", "Categories", "NumericFeatures");
            var concatOutput = experiment.Add(concatInput);

            var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
            {
                TrainingData = concatOutput.OutputData,
                LossFunction = new HingeLossSDCAClassificationLossFunction()
                {
                    Margin = 1.1f
                },
                NumThreads = 1,
                Shuffle    = false
            };
            var sdcaOutput = experiment.Add(sdcaInput);

            var scoreInput = new Legacy.Transforms.DatasetScorer
            {
                Data           = concatOutput.OutputData,
                PredictorModel = sdcaOutput.PredictorModel
            };
            var scoreOutput = experiment.Add(scoreInput);

            var evalInput = new Legacy.Models.BinaryClassificationEvaluator
            {
                Data = scoreOutput.ScoredData
            };
            var evalOutput = experiment.Add(evalInput);

            experiment.Compile();
            experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
            experiment.Run();
            var data = experiment.GetOutput(evalOutput.OverallMetrics);

            var schema = data.Schema;
            var b      = schema.TryGetColumnIndex("AUC", out int aucCol);

            Assert.True(b);
            using (var cursor = data.GetRowCursor(col => col == aucCol))
            {
                var getter = cursor.GetGetter <double>(aucCol);
                b = cursor.MoveNext();
                Assert.True(b);
                double auc = 0;
                getter(ref auc);
                Assert.Equal(0.93, auc, 2);
                b = cursor.MoveNext();
                Assert.False(b);
            }
        }
Exemple #5
0
        public void TestTrainTestMacro()
        {
            var dataPath = GetDataPath("adult.tiny.with-schema.txt");

            using (var env = new ConsoleEnvironment())
            {
                var subGraph = env.CreateExperiment();

                var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer();
                catInput.AddColumn("Categories");
                var catOutput = subGraph.Add(catInput);

                var concatInput = new Legacy.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = subGraph.Add(concatInput);

                var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    LossFunction = new HingeLossSDCAClassificationLossFunction()
                    {
                        Margin = 1.1f
                    },
                    NumThreads = 1,
                    Shuffle    = false
                };
                var sdcaOutput = subGraph.Add(sdcaInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model),
                    PredictorModel  = sdcaOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment = env.CreateExperiment();

                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var trainTestInput = new Legacy.Models.TrainTestBinaryEvaluator
                {
                    TrainingData = importOutput.Data,
                    TestingData  = importOutput.Data,
                    Nodes        = subGraph
                };
                trainTestInput.Inputs.Data   = catInput.Data;
                trainTestInput.Outputs.Model = modelCombineOutput.PredictorModel;
                var trainTestOutput = experiment.Add(trainTestInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(trainTestOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.93, auc, 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }