예제 #1
0
        public void TestSimpleExperiment()
        {
            var dataPath = GetDataPath("adult.tiny.with-schema.txt");

            using (var env = new ConsoleEnvironment())
            {
                var experiment = env.CreateExperiment();

                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var normalizeInput = new Legacy.Transforms.MinMaxNormalizer
                {
                    Data = importOutput.Data
                };
                normalizeInput.AddColumn("NumericFeatures");
                var normalizeOutput = experiment.Add(normalizeInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(normalizeOutput.OutputData);

                var schema = data.Schema;
                Assert.Equal(5, schema.ColumnCount);
                var expected = new[] { "Label", "Workclass", "Categories", "NumericFeatures", "NumericFeatures" };
                for (int i = 0; i < schema.ColumnCount; i++)
                {
                    Assert.Equal(expected[i], schema.GetColumnName(i));
                }
            }
        }
예제 #2
0
        /// <summary>
        /// Performs train-test on a pipeline.
        /// </summary>
        /// <typeparam name="TInput">Class type that represents input schema.</typeparam>
        /// <typeparam name="TOutput">Class type that represents prediction schema.</typeparam>
        /// <param name="pipeline">Machine learning pipeline that contains <see cref="ILearningPipelineLoader"/>,
        /// transforms and at least one trainer.</param>
        /// <param name="testData"><see cref="ILearningPipelineLoader"/> that represents the test dataset.</param>
        /// <returns>Metrics and predictor model.</returns>
        public TrainTestEvaluatorOutput <TInput, TOutput> TrainTestEvaluate <TInput, TOutput>(LearningPipeline pipeline, ILearningPipelineLoader testData)
            where TInput : class
            where TOutput : class, new()
        {
            using (var environment = new ConsoleEnvironment())
            {
                Experiment                     subGraph              = environment.CreateExperiment();
                ILearningPipelineStep          step                  = null;
                List <ILearningPipelineLoader> loaders               = new List <ILearningPipelineLoader>();
                List <Var <ITransformModel> >  transformModels       = new List <Var <ITransformModel> >();
                Var <ITransformModel>          lastTransformModel    = null;
                Var <IDataView>                firstPipelineDataStep = null;
                Var <IPredictorModel>          firstModel            = null;
                ILearningPipelineItem          firstTransform        = null;
                foreach (ILearningPipelineItem currentItem in pipeline)
                {
                    if (currentItem is ILearningPipelineLoader loader)
                    {
                        loaders.Add(loader);
                        continue;
                    }

                    step = currentItem.ApplyStep(step, subGraph);

                    if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null)
                    {
                        transformModels.Add(dataStep.Model);
                        if (firstPipelineDataStep == null)
                        {
                            firstPipelineDataStep = dataStep.Data;
                            firstTransform        = currentItem;
                        }
                    }
예제 #3
0
        public void CanSuccessfullyRetrieveQuotedData()
        {
            string dataPath = GetDataPath("QuotingData.csv");
            var    loader   = new Legacy.Data.TextLoader(dataPath).CreateFrom <QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: true, supportSparse: false);

            using (var environment = new ConsoleEnvironment())
            {
                Experiment experiment = environment.CreateExperiment();
                Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep;

                experiment.Compile();
                loader.SetInput(environment, experiment);
                experiment.Run();

                IDataView data = experiment.GetOutput(output.Data);
                Assert.NotNull(data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var IDGetter   = cursor.GetGetter <float>(0);
                    var TextGetter = cursor.GetGetter <ReadOnlyMemory <char> >(1);

                    Assert.True(cursor.MoveNext());

                    float ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(1, ID);

                    ReadOnlyMemory <char> Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("This text contains comma, within quotes.", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(2, ID);

                    Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("This text contains extra punctuations and special characters.;*<>?!@#$%^&*()_+=-{}|[]:;'", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(3, ID);

                    Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("This text has no quotes", Text.ToString());

                    Assert.False(cursor.MoveNext());
                }
            }
        }
예제 #4
0
        /// <summary>
        /// Computes the quality metrics for the PredictionModel using the specified data set.
        /// </summary>
        /// <param name="model">
        /// The trained PredictionModel to be evaluated.
        /// </param>
        /// <param name="testData">
        /// The test data that will be predicted and used to evaluate the model.
        /// </param>
        /// <returns>
        /// A BinaryClassificationMetrics instance that describes how well the model performed against the test data.
        /// </returns>
        public BinaryClassificationMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData)
        {
            using (var environment = new ConsoleEnvironment())
            {
                environment.CheckValue(model, nameof(model));
                environment.CheckValue(testData, nameof(testData));

                Experiment experiment = environment.CreateExperiment();

                ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment);
                if (!(testDataStep is ILearningPipelineDataStep testDataOutput))
                {
                    throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep.");
                }

                var datasetScorer = new DatasetTransformScorer
                {
                    Data = testDataOutput.Data
                };
                DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer);

                Data = scoreOutput.ScoredData;
                Output evaluteOutput = experiment.Add(this);

                experiment.Compile();

                experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel);
                testData.SetInput(environment, experiment);

                experiment.Run();

                IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics);
                if (overallMetrics == null)
                {
                    throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(BinaryClassificationEvaluator)} Evaluate.");
                }

                IDataView confusionMatrix = experiment.GetOutput(evaluteOutput.ConfusionMatrix);
                if (confusionMatrix == null)
                {
                    throw environment.Except($"Could not find ConfusionMatrix in the results returned in {nameof(BinaryClassificationEvaluator)} Evaluate.");
                }

                var metric = BinaryClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix);

                if (metric.Count != 1)
                {
                    throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics");
                }

                return(metric[0]);
            }
        }
예제 #5
0
        /// <summary>
        /// <a href="https://onnx.ai/">ONNX</a> is an intermediate representation format
        /// for machine learning models.
        /// </summary>
        /// <remarks>
        /// <para>It is used to make models portable such that you can
        /// train a model using a toolkit and run it in another tookit's runtime, for example,
        /// you can create a model using ML.NET, export it to an ONNX-ML model file,
        /// then load and run that ONNX-ML model in Windows ML, on an UWP Windows 10 app.</para>
        ///
        /// <para>This API converts an ML.NET model to ONNX-ML format by inspecting the transform pipeline
        /// from the end, checking for components that know how to save themselves as ONNX.
        /// The first item in the transform pipeline that does not know how to save itself
        /// as ONNX, is considered the "input" to the ONNX pipeline. (Ideally this would be the
        /// original loader itself, but this may not be possible if the user used unsavable
        /// transforms in defining the pipe.) All the columns in the source that are a type the
        /// ONNX knows how to deal with will be tracked. Intermediate transformations of the
        /// data appearing as new columns will appear in the output block of the ONNX, with names
        /// derived from the corresponding column names. The ONNX JSON will be serialized to a
        /// path defined through the Json option.</para>
        ///
        /// <para>This API supports the following arguments:</para>
        /// <list type="bullet">
        /// <item><description><see cref="Onnx"/> indicates the file to write the ONNX protocol buffer file to. This is required.</description></item>
        /// <item><description><see cref="Json"/> indicates the file to write the JSON representation of the ONNX model. This is optional.</description></item>
        /// <item><description><see cref="Name"/> indicates the name property in the ONNX model. If left unspecified, it will
        /// be the extension-less name of the file specified in the onnx indicates the protocol buffer file
        /// to write the ONNX representation to.</description></item>
        /// <item><description><see cref="Domain"/> indicates the domain name of the model. ONNX uses reverse domain name space indicators.
        /// For example com.microsoft.cognitiveservices. This is a required field.</description></item>
        /// <item><description><see cref="InputsToDrop"/> is a string array of input column names to omit from the input mapping.
        /// A common scenario might be to drop the label column, for instance, since it may not be practically
        /// useful for the pipeline. Note that any columns depending on these naturally cannot be saved.</description></item>
        /// <item><description><see cref="OutputsToDrop"/> is similar, except for the output schema. Note that the pipeline handler
        /// is currently not intelligent enough to drop intermediate calculations that produce this value: this will
        /// merely omit that value from the actual output.</description></item>
        /// </list>
        ///
        /// <para>Transforms that can be exported to ONNX</para>
        /// <list type="number">
        /// <item><description>Concat</description></item>
        /// <item><description>KeyToVector</description></item>
        /// <item><description>NAReplace</description></item>
        /// <item><description>Normalize</description></item>
        /// <item><description>Term</description></item>
        /// <item><description>Categorical</description></item>
        /// </list>
        ///
        /// <para>Learners that can be exported to ONNX</para>
        /// <list type="number">
        /// <item><description>FastTree</description></item>
        /// <item><description>LightGBM</description></item>
        /// <item><description>Logistic Regression</description></item>
        /// </list>
        ///
        /// <para>See <a href="https://github.com/dotnet/machinelearning/blob/master/test/Microsoft.ML.Tests/OnnxTests.cs">OnnxTests.cs</a>
        /// for an example on how to train a model and then convert that model to ONNX.</para>
        /// </remarks>
        /// <param name="model">Model that needs to be converted to ONNX format.</param>
        public void Convert(PredictionModel model)
        {
            using (var environment = new ConsoleEnvironment())
            {
                environment.CheckValue(model, nameof(model));

                Experiment experiment = environment.CreateExperiment();
                experiment.Add(this);
                experiment.Compile();
                experiment.SetInput(Model, model.PredictorModel);
                experiment.Run();
            }
        }
예제 #6
0
        public void CanSuccessfullyApplyATransform()
        {
            var loader = new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom <Input>();

            using (var environment = new ConsoleEnvironment())
            {
                Experiment experiment = environment.CreateExperiment();
                Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep;

                Assert.NotNull(output.Data);
                Assert.NotNull(output.Data.VarName);
                Assert.Null(output.Model);
            }
        }
예제 #7
0
        public void CanSuccessfullyTrimSpaces()
        {
            string dataPath = GetDataPath("TrimData.csv");
            var    loader   = new Legacy.Data.TextLoader(dataPath).CreateFrom <QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true);

            using (var environment = new ConsoleEnvironment())
            {
                Experiment experiment = environment.CreateExperiment();
                Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep;

                experiment.Compile();
                loader.SetInput(environment, experiment);
                experiment.Run();

                IDataView data = experiment.GetOutput(output.Data);
                Assert.NotNull(data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var IDGetter   = cursor.GetGetter <float>(0);
                    var TextGetter = cursor.GetGetter <ReadOnlyMemory <char> >(1);

                    Assert.True(cursor.MoveNext());

                    float ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(1, ID);

                    ReadOnlyMemory <char> Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("There is a space at the end", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(2, ID);

                    Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("There is no space at the end", Text.ToString());

                    Assert.False(cursor.MoveNext());
                }
            }
        }
예제 #8
0
        public void CanSuccessfullyApplyATransform()
        {
            var collection = CollectionDataSource.Create(new List <Input>()
            {
                new Input {
                    Number1 = 1, String1 = "1"
                }
            });

            using (var environment = new ConsoleEnvironment())
            {
                Experiment experiment = environment.CreateExperiment();
                Legacy.ILearningPipelineDataStep output = (Legacy.ILearningPipelineDataStep)collection.ApplyStep(null, experiment);

                Assert.NotNull(output.Data);
                Assert.NotNull(output.Data.VarName);
                Assert.Null(output.Model);
            }
        }
예제 #9
0
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only
        public void TestTensorFlowEntryPoint()
        {
            var dataPath = GetDataPath("Train-Tiny-28x28.txt");

            using (var env = new ConsoleEnvironment(42))
            {
                var experiment = env.CreateExperiment();

                var importInput = new Legacy.Data.TextLoader(dataPath);
                importInput.Arguments.Column = new TextLoaderColumn[]
                {
                    new TextLoaderColumn {
                        Name = "Label", Source = new[] { new TextLoaderRange(0) }
                    },
                    new TextLoaderColumn {
                        Name = "Placeholder", Source = new[] { new TextLoaderRange(1, 784) }
                    }
                };
                var importOutput = experiment.Add(importInput);

                var tfTransformInput = new Legacy.Transforms.TensorFlowScorer
                {
                    Data          = importOutput.Data,
                    ModelLocation = "mnist_model/frozen_saved_model.pb",
                    InputColumns  = new[] { "Placeholder" },
                    OutputColumns = new[] { "Softmax" },
                };
                var tfTransformOutput = experiment.Add(tfTransformInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(tfTransformOutput.OutputData);

                var schema = data.Schema;
                Assert.Equal(3, schema.ColumnCount);
                Assert.Equal("Softmax", schema.GetColumnName(2));
                Assert.Equal(10, schema.GetColumnType(2).VectorSize);
            }
        }
예제 #10
0
            public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
            {
                using (var env = new ConsoleEnvironment())
                {
                    var subgraph = env.CreateExperiment();
                    subgraph.Add(_trainer);
                    var ova = new OneVersusAll();
                    if (previousStep != null)
                    {
                        if (!(previousStep is ILearningPipelineDataStep dataStep))
                        {
                            throw new InvalidOperationException($"{ nameof(OneVersusAll)} only supports an { nameof(ILearningPipelineDataStep)} as an input.");
                        }

                        _data                = dataStep.Data;
                        ova.TrainingData     = dataStep.Data;
                        ova.UseProbabilities = _useProbabilities;
                        ova.Nodes            = subgraph;
                    }
                    Output output = experiment.Add(ova);
                    return(new OvaPipelineStep(output));
                }
            }
예제 #11
0
        /// <summary>
        /// Train the model using the ML components in the pipeline.
        /// </summary>
        /// <typeparam name="TInput">Type of data instances the model will be trained on. It's a custom type defined by the user according to the structure of data.
        /// <para/>
        /// Please see https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet/get-started/windows for more details on input type.
        /// </typeparam>
        /// <typeparam name="TOutput">Ouput type. The prediction will be return based on this type.
        /// Please see https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet/get-started/windows for more details on output type.
        /// </typeparam>
        /// <returns>PredictionModel object. This is the model object used for prediction on new instances. </returns>
        public PredictionModel <TInput, TOutput> Train <TInput, TOutput>()
            where TInput : class
            where TOutput : class, new()
        {
            using (var environment = new ConsoleEnvironment(seed: _seed, conc: _conc))
            {
                Experiment                     experiment         = environment.CreateExperiment();
                ILearningPipelineStep          step               = null;
                List <ILearningPipelineLoader> loaders            = new List <ILearningPipelineLoader>();
                List <Var <ITransformModel> >  transformModels    = new List <Var <ITransformModel> >();
                Var <ITransformModel>          lastTransformModel = null;

                foreach (ILearningPipelineItem currentItem in this)
                {
                    if (currentItem is ILearningPipelineLoader loader)
                    {
                        loaders.Add(loader);
                    }

                    step = currentItem.ApplyStep(step, experiment);
                    if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null)
                    {
                        transformModels.Add(dataStep.Model);
                    }
예제 #12
0
        public void TestCrossValidationMacroWithStratification()
        {
            var dataPath = GetDataPath(@"breast-cancer.txt");

            using (var env = new ConsoleEnvironment(42))
            {
                var subGraph = env.CreateExperiment();

                var nop       = new Legacy.Transforms.NoOperation();
                var nopOutput = subGraph.Add(nop);

                var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
                {
                    TrainingData = nopOutput.OutputData,
                    NumThreads   = 1
                };
                var learnerOutput = subGraph.Add(learnerInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(nopOutput.Model),
                    PredictorModel  = learnerOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment  = env.CreateExperiment();
                var importInput = new Legacy.Data.TextLoader(dataPath);
                importInput.Arguments.Column = new Legacy.Data.TextLoaderColumn[]
                {
                    new Legacy.Data.TextLoaderColumn {
                        Name = "Label", Source = new[] { new Legacy.Data.TextLoaderRange(0) }
                    },
                    new Legacy.Data.TextLoaderColumn {
                        Name = "Strat", Source = new[] { new Legacy.Data.TextLoaderRange(1) }
                    },
                    new Legacy.Data.TextLoaderColumn {
                        Name = "Features", Source = new[] { new Legacy.Data.TextLoaderRange(2, 9) }
                    }
                };
                var importOutput = experiment.Add(importInput);

                var crossValidate = new Legacy.Models.CrossValidator
                {
                    Data                 = importOutput.Data,
                    Nodes                = subGraph,
                    TransformModel       = null,
                    StratificationColumn = "Strat"
                };
                crossValidate.Inputs.Data            = nop.Data;
                crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidate);
                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int metricCol);
                Assert.True(b);
                b = schema.TryGetColumnIndex("Fold Index", out int foldCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol))
                {
                    var getter                 = cursor.GetGetter <double>(metricCol);
                    var foldGetter             = cursor.GetGetter <ReadOnlyMemory <char> >(foldCol);
                    ReadOnlyMemory <char> fold = default;

                    // Get the verage.
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double avg = 0;
                    getter(ref avg);
                    foldGetter(ref fold);
                    Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold));

                    // Get the standard deviation.
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double stdev = 0;
                    getter(ref stdev);
                    foldGetter(ref fold);
                    Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold));
                    Assert.Equal(0.00485, stdev, 5);

                    double sum = 0;
                    double val = 0;
                    for (int f = 0; f < 2; f++)
                    {
                        b = cursor.MoveNext();
                        Assert.True(b);
                        getter(ref val);
                        foldGetter(ref fold);
                        sum += val;
                        Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold));
                    }
                    Assert.Equal(avg, sum / 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
예제 #13
0
        public void TestSimpleTrainExperiment()
        {
            var dataPath = GetDataPath("adult.tiny.with-schema.txt");

            using (var env = new ConsoleEnvironment())
            {
                var experiment = env.CreateExperiment();

                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer
                {
                    Data = importOutput.Data
                };
                catInput.AddColumn("Categories");
                var catOutput = experiment.Add(catInput);

                var concatInput = new Legacy.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = experiment.Add(concatInput);

                var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    LossFunction = new HingeLossSDCAClassificationLossFunction()
                    {
                        Margin = 1.1f
                    },
                    NumThreads = 1,
                    Shuffle    = false
                };
                var sdcaOutput = experiment.Add(sdcaInput);

                var scoreInput = new Legacy.Transforms.DatasetScorer
                {
                    Data           = concatOutput.OutputData,
                    PredictorModel = sdcaOutput.PredictorModel
                };
                var scoreOutput = experiment.Add(scoreInput);

                var evalInput = new Legacy.Models.BinaryClassificationEvaluator
                {
                    Data = scoreOutput.ScoredData
                };
                var evalOutput = experiment.Add(evalInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(evalOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.93, auc, 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
예제 #14
0
        public void TestCrossValidationMacroMultiClassWithWarnings()
        {
            var dataPath = GetDataPath(@"Train-Tiny-28x28.txt");

            using (var env = new ConsoleEnvironment(42))
            {
                var subGraph = env.CreateExperiment();

                var nop       = new Legacy.Transforms.NoOperation();
                var nopOutput = subGraph.Add(nop);

                var learnerInput = new Legacy.Trainers.LogisticRegressionClassifier
                {
                    TrainingData = nopOutput.OutputData,
                    NumThreads   = 1
                };
                var learnerOutput = subGraph.Add(learnerInput);

                var experiment   = env.CreateExperiment();
                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var filter = new Legacy.Transforms.RowRangeFilter();
                filter.Data   = importOutput.Data;
                filter.Column = "Label";
                filter.Min    = 0;
                filter.Max    = 5;
                var filterOutput = experiment.Add(filter);

                var term = new Legacy.Transforms.TextToKeyConverter();
                term.Column = new[]
                {
                    new Legacy.Transforms.TermTransformColumn()
                    {
                        Source = "Label", Name = "Strat", Sort = Legacy.Transforms.TermTransformSortOrder.Value
                    }
                };
                term.Data = filterOutput.OutputData;
                var termOutput = experiment.Add(term);

                var crossValidate = new Legacy.Models.CrossValidator
                {
                    Data                 = termOutput.OutputData,
                    Nodes                = subGraph,
                    Kind                 = Legacy.Models.MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer,
                    TransformModel       = null,
                    StratificationColumn = "Strat"
                };
                crossValidate.Inputs.Data            = nop.Data;
                crossValidate.Outputs.PredictorModel = learnerOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidate);

                experiment.Compile();
                importInput.SetInput(env, experiment);
                experiment.Run();
                var warnings = experiment.GetOutput(crossValidateOutput.Warnings);

                var schema = warnings.Schema;
                var b      = schema.TryGetColumnIndex("WarningText", out int warningCol);
                Assert.True(b);
                using (var cursor = warnings.GetRowCursor(col => col == warningCol))
                {
                    var getter = cursor.GetGetter <ReadOnlyMemory <char> >(warningCol);

                    b = cursor.MoveNext();
                    Assert.True(b);
                    var warning = default(ReadOnlyMemory <char>);
                    getter(ref warning);
                    Assert.Contains("test instances with class values not seen in the training set.", warning.ToString());
                    b = cursor.MoveNext();
                    Assert.True(b);
                    getter(ref warning);
                    Assert.Contains("Detected columns of variable length: SortedScores, SortedClasses", warning.ToString());
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
예제 #15
0
        public void TestCrossValidationMacroWithMultiClass()
        {
            var dataPath = GetDataPath(@"Train-Tiny-28x28.txt");

            using (var env = new ConsoleEnvironment(42))
            {
                var subGraph = env.CreateExperiment();

                var nop       = new Legacy.Transforms.NoOperation();
                var nopOutput = subGraph.Add(nop);

                var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentClassifier
                {
                    TrainingData = nopOutput.OutputData,
                    NumThreads   = 1
                };
                var learnerOutput = subGraph.Add(learnerInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(nopOutput.Model),
                    PredictorModel  = learnerOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment   = env.CreateExperiment();
                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var crossValidate = new Legacy.Models.CrossValidator
                {
                    Data           = importOutput.Data,
                    Nodes          = subGraph,
                    Kind           = Legacy.Models.MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer,
                    TransformModel = null
                };
                crossValidate.Inputs.Data            = nop.Data;
                crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidate);

                experiment.Compile();
                importInput.SetInput(env, experiment);
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("Accuracy(micro-avg)", out int metricCol);
                Assert.True(b);
                b = schema.TryGetColumnIndex("Fold Index", out int foldCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol))
                {
                    var getter                 = cursor.GetGetter <double>(metricCol);
                    var foldGetter             = cursor.GetGetter <ReadOnlyMemory <char> >(foldCol);
                    ReadOnlyMemory <char> fold = default;

                    // Get the average.
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double avg = 0;
                    getter(ref avg);
                    foldGetter(ref fold);
                    Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold));

                    // Get the standard deviation.
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double stdev = 0;
                    getter(ref stdev);
                    foldGetter(ref fold);
                    Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold));
                    Assert.Equal(0.025, stdev, 3);

                    double sum = 0;
                    double val = 0;
                    for (int f = 0; f < 2; f++)
                    {
                        b = cursor.MoveNext();
                        Assert.True(b);
                        getter(ref val);
                        foldGetter(ref fold);
                        sum += val;
                        Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold));
                    }
                    Assert.Equal(avg, sum / 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }

                var confusion = experiment.GetOutput(crossValidateOutput.ConfusionMatrix);
                schema = confusion.Schema;
                b      = schema.TryGetColumnIndex("Count", out int countCol);
                Assert.True(b);
                b = schema.TryGetColumnIndex("Fold Index", out foldCol);
                Assert.True(b);
                var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, countCol);
                Assert.True(type != null && type.ItemType.IsText && type.VectorSize == 10);
                var slotNames = default(VBuffer <ReadOnlyMemory <char> >);
                schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countCol, ref slotNames);
                Assert.True(slotNames.Values.Select((s, i) => ReadOnlyMemoryUtils.EqualsStr(i.ToString(), s)).All(x => x));
                using (var curs = confusion.GetRowCursor(col => true))
                {
                    var countGetter = curs.GetGetter <VBuffer <double> >(countCol);
                    var foldGetter  = curs.GetGetter <ReadOnlyMemory <char> >(foldCol);
                    var confCount   = default(VBuffer <double>);
                    var foldIndex   = default(ReadOnlyMemory <char>);
                    int rowCount    = 0;
                    var foldCur     = "Fold 0";
                    while (curs.MoveNext())
                    {
                        countGetter(ref confCount);
                        foldGetter(ref foldIndex);
                        rowCount++;
                        Assert.True(ReadOnlyMemoryUtils.EqualsStr(foldCur, foldIndex));
                        if (rowCount == 10)
                        {
                            rowCount = 0;
                            foldCur  = "Fold 1";
                        }
                    }
                    Assert.Equal(0, rowCount);
                }

                var warnings = experiment.GetOutput(crossValidateOutput.Warnings);
                using (var cursor = warnings.GetRowCursor(col => true))
                    Assert.False(cursor.MoveNext());
            }
        }
예제 #16
0
        public void TestCrossValidationMacro()
        {
            var dataPath = GetDataPath(TestDatasets.generatedRegressionDatasetmacro.trainFilename);

            using (var env = new ConsoleEnvironment(42))
            {
                var subGraph = env.CreateExperiment();

                var nop       = new Legacy.Transforms.NoOperation();
                var nopOutput = subGraph.Add(nop);

                var generate = new Legacy.Transforms.RandomNumberGenerator();
                generate.Column = new[] { new Legacy.Transforms.GenerateNumberTransformColumn()
                                          {
                                              Name = "Weight1"
                                          } };
                generate.Data = nopOutput.OutputData;
                var generateOutput = subGraph.Add(generate);

                var learnerInput = new Legacy.Trainers.PoissonRegressor
                {
                    TrainingData = generateOutput.OutputData,
                    NumThreads   = 1,
                    WeightColumn = "Weight1"
                };
                var learnerOutput = subGraph.Add(learnerInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(nopOutput.Model, generateOutput.Model),
                    PredictorModel  = learnerOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment  = env.CreateExperiment();
                var importInput = new Legacy.Data.TextLoader(dataPath)
                {
                    Arguments = new Legacy.Data.TextLoaderArguments
                    {
                        Separator = new[] { ';' },
                        HasHeader = true,
                        Column    = new[]
                        {
                            new TextLoaderColumn()
                            {
                                Name   = "Label",
                                Source = new [] { new TextLoaderRange(11) },
                                Type   = Legacy.Data.DataKind.Num
                            },

                            new TextLoaderColumn()
                            {
                                Name   = "Features",
                                Source = new [] { new TextLoaderRange(0, 10) },
                                Type   = Legacy.Data.DataKind.Num
                            }
                        }
                    }
                };
                var importOutput = experiment.Add(importInput);

                var crossValidate = new Legacy.Models.CrossValidator
                {
                    Data           = importOutput.Data,
                    Nodes          = subGraph,
                    Kind           = Legacy.Models.MacroUtilsTrainerKinds.SignatureRegressorTrainer,
                    TransformModel = null,
                    WeightColumn   = "Weight1"
                };
                crossValidate.Inputs.Data            = nop.Data;
                crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidate);

                experiment.Compile();
                importInput.SetInput(env, experiment);
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("L1(avg)", out int metricCol);
                Assert.True(b);
                b = schema.TryGetColumnIndex("Fold Index", out int foldCol);
                Assert.True(b);
                b = schema.TryGetColumnIndex("IsWeighted", out int isWeightedCol);
                using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol || col == isWeightedCol))
                {
                    var getter                 = cursor.GetGetter <double>(metricCol);
                    var foldGetter             = cursor.GetGetter <ReadOnlyMemory <char> >(foldCol);
                    ReadOnlyMemory <char> fold = default;
                    var    isWeightedGetter    = cursor.GetGetter <bool>(isWeightedCol);
                    bool   isWeighted          = default;
                    double avg                 = 0;
                    double weightedAvg         = 0;
                    for (int w = 0; w < 2; w++)
                    {
                        // Get the average.
                        b = cursor.MoveNext();
                        Assert.True(b);
                        if (w == 1)
                        {
                            getter(ref weightedAvg);
                        }
                        else
                        {
                            getter(ref avg);
                        }
                        foldGetter(ref fold);
                        Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold));
                        isWeightedGetter(ref isWeighted);
                        Assert.True(isWeighted == (w == 1));

                        // Get the standard deviation.
                        b = cursor.MoveNext();
                        Assert.True(b);
                        double stdev = 0;
                        getter(ref stdev);
                        foldGetter(ref fold);
                        Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold));
                        if (w == 1)
                        {
                            Assert.Equal(1.585, stdev, 3);
                        }
                        else
                        {
                            Assert.Equal(1.39, stdev, 2);
                        }
                        isWeightedGetter(ref isWeighted);
                        Assert.True(isWeighted == (w == 1));
                    }
                    double sum         = 0;
                    double weightedSum = 0;
                    for (int f = 0; f < 2; f++)
                    {
                        for (int w = 0; w < 2; w++)
                        {
                            b = cursor.MoveNext();
                            Assert.True(b);
                            double val = 0;
                            getter(ref val);
                            foldGetter(ref fold);
                            if (w == 1)
                            {
                                weightedSum += val;
                            }
                            else
                            {
                                sum += val;
                            }
                            Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold));
                            isWeightedGetter(ref isWeighted);
                            Assert.True(isWeighted == (w == 1));
                        }
                    }
                    Assert.Equal(weightedAvg, weightedSum / 2);
                    Assert.Equal(avg, sum / 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
예제 #17
0
        public void TestCrossValidationBinaryMacro()
        {
            var dataPath = GetDataPath("adult.tiny.with-schema.txt");

            using (var env = new ConsoleEnvironment())
            {
                var subGraph = env.CreateExperiment();

                var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer();
                catInput.AddColumn("Categories");
                var catOutput = subGraph.Add(catInput);

                var concatInput = new Legacy.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = subGraph.Add(concatInput);

                var lrInput = new Legacy.Trainers.LogisticRegressionBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    NumThreads   = 1
                };
                var lrOutput = subGraph.Add(lrInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model),
                    PredictorModel  = lrOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment = env.CreateExperiment();

                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var crossValidateBinary = new Legacy.Models.BinaryCrossValidator
                {
                    Data  = importOutput.Data,
                    Nodes = subGraph
                };
                crossValidateBinary.Inputs.Data   = catInput.Data;
                crossValidateBinary.Outputs.Model = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidateBinary);

                experiment.Compile();
                importInput.SetInput(env, experiment);
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.87, auc, 1);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
예제 #18
0
        public void CanSuccessfullyRetrieveSparseData()
        {
            string dataPath = GetDataPath("SparseData.txt");
            var    loader   = new Legacy.Data.TextLoader(dataPath).CreateFrom <SparseInput>(useHeader: true, allowQuotedStrings: false, supportSparse: true);

            using (var environment = new ConsoleEnvironment())
            {
                Experiment experiment = environment.CreateExperiment();
                Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep;

                experiment.Compile();
                loader.SetInput(environment, experiment);
                experiment.Run();

                IDataView data = experiment.GetOutput(output.Data);
                Assert.NotNull(data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var getters = new ValueGetter <float>[] {
                        cursor.GetGetter <float>(0),
                        cursor.GetGetter <float>(1),
                        cursor.GetGetter <float>(2),
                        cursor.GetGetter <float>(3),
                        cursor.GetGetter <float>(4)
                    };


                    Assert.True(cursor.MoveNext());

                    float[] targets = new float[] { 1, 2, 3, 4, 5 };
                    for (int i = 0; i < getters.Length; i++)
                    {
                        float value = 0;
                        getters[i](ref value);
                        Assert.Equal(targets[i], value);
                    }

                    Assert.True(cursor.MoveNext());

                    targets = new float[] { 0, 0, 0, 4, 5 };
                    for (int i = 0; i < getters.Length; i++)
                    {
                        float value = 0;
                        getters[i](ref value);
                        Assert.Equal(targets[i], value);
                    }

                    Assert.True(cursor.MoveNext());

                    targets = new float[] { 0, 2, 0, 0, 0 };
                    for (int i = 0; i < getters.Length; i++)
                    {
                        float value = 0;
                        getters[i](ref value);
                        Assert.Equal(targets[i], value);
                    }

                    Assert.False(cursor.MoveNext());
                }
            }
        }
예제 #19
0
        public void CanSuccessfullyEnumerated()
        {
            var collection = CollectionDataSource.Create(new List <Input>()
            {
                new Input {
                    Number1 = 1, String1 = "1"
                },
                new Input {
                    Number1 = 2, String1 = "2"
                },
                new Input {
                    Number1 = 3, String1 = "3"
                }
            });

            using (var environment = new ConsoleEnvironment())
            {
                Experiment experiment = environment.CreateExperiment();
                Legacy.ILearningPipelineDataStep output = collection.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep;

                experiment.Compile();
                collection.SetInput(environment, experiment);
                experiment.Run();

                IDataView data = experiment.GetOutput(output.Data);
                Assert.NotNull(data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var IDGetter   = cursor.GetGetter <float>(0);
                    var TextGetter = cursor.GetGetter <ReadOnlyMemory <char> >(1);

                    Assert.True(cursor.MoveNext());

                    float ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(1, ID);

                    ReadOnlyMemory <char> Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("1", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(2, ID);

                    Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("2", Text.ToString());

                    Assert.True(cursor.MoveNext());

                    ID = 0;
                    IDGetter(ref ID);
                    Assert.Equal(3, ID);

                    Text = new ReadOnlyMemory <char>();
                    TextGetter(ref Text);
                    Assert.Equal("3", Text.ToString());

                    Assert.False(cursor.MoveNext());
                }
            }
        }
예제 #20
0
        public void TestCrossValidationMacroWithNonDefaultNames()
        {
            string dataPath = GetDataPath(@"adult.tiny.with-schema.txt");

            using (var env = new ConsoleEnvironment(42))
            {
                var subGraph = env.CreateExperiment();

                var textToKey = new Legacy.Transforms.TextToKeyConverter();
                textToKey.Column = new[] { new Legacy.Transforms.TermTransformColumn()
                                           {
                                               Name = "Label1", Source = "Label"
                                           } };
                var textToKeyOutput = subGraph.Add(textToKey);

                var hash = new Legacy.Transforms.HashConverter();
                hash.Column = new[] { new Legacy.Transforms.HashJoinTransformColumn()
                                      {
                                          Name = "GroupId1", Source = "Workclass"
                                      } };
                hash.Data = textToKeyOutput.OutputData;
                var hashOutput = subGraph.Add(hash);

                var learnerInput = new Legacy.Trainers.FastTreeRanker
                {
                    TrainingData  = hashOutput.OutputData,
                    NumThreads    = 1,
                    LabelColumn   = "Label1",
                    GroupIdColumn = "GroupId1"
                };
                var learnerOutput = subGraph.Add(learnerInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(textToKeyOutput.Model, hashOutput.Model),
                    PredictorModel  = learnerOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment  = env.CreateExperiment();
                var importInput = new Legacy.Data.TextLoader(dataPath);
                importInput.Arguments.HasHeader = true;
                importInput.Arguments.Column    = new TextLoaderColumn[]
                {
                    new TextLoaderColumn {
                        Name = "Label", Source = new[] { new TextLoaderRange(0) }
                    },
                    new TextLoaderColumn {
                        Name = "Workclass", Source = new[] { new TextLoaderRange(1) }, Type = Legacy.Data.DataKind.Text
                    },
                    new TextLoaderColumn {
                        Name = "Features", Source = new[] { new TextLoaderRange(9, 14) }
                    }
                };
                var importOutput = experiment.Add(importInput);

                var crossValidate = new Legacy.Models.CrossValidator
                {
                    Data           = importOutput.Data,
                    Nodes          = subGraph,
                    TransformModel = null,
                    LabelColumn    = "Label1",
                    GroupColumn    = "GroupId1",
                    NameColumn     = "Workclass",
                    Kind           = Legacy.Models.MacroUtilsTrainerKinds.SignatureRankerTrainer
                };
                crossValidate.Inputs.Data            = textToKey.Data;
                crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidate);
                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("NDCG", out int metricCol);
                Assert.True(b);
                b = schema.TryGetColumnIndex("Fold Index", out int foldCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol))
                {
                    var getter                 = cursor.GetGetter <VBuffer <double> >(metricCol);
                    var foldGetter             = cursor.GetGetter <ReadOnlyMemory <char> >(foldCol);
                    ReadOnlyMemory <char> fold = default;

                    // Get the verage.
                    b = cursor.MoveNext();
                    Assert.True(b);
                    var avg = default(VBuffer <double>);
                    getter(ref avg);
                    foldGetter(ref fold);
                    Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold));

                    // Get the standard deviation.
                    b = cursor.MoveNext();
                    Assert.True(b);
                    var stdev = default(VBuffer <double>);
                    getter(ref stdev);
                    foldGetter(ref fold);
                    Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold));
                    Assert.Equal(2.462, stdev.Values[0], 3);
                    Assert.Equal(2.763, stdev.Values[1], 3);
                    Assert.Equal(3.273, stdev.Values[2], 3);

                    var sumBldr = new BufferBuilder <double>(R8Adder.Instance);
                    sumBldr.Reset(avg.Length, true);
                    var val = default(VBuffer <double>);
                    for (int f = 0; f < 2; f++)
                    {
                        b = cursor.MoveNext();
                        Assert.True(b);
                        getter(ref val);
                        foldGetter(ref fold);
                        sumBldr.AddFeatures(0, ref val);
                        Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold));
                    }
                    var sum = default(VBuffer <double>);
                    sumBldr.GetResult(ref sum);
                    for (int i = 0; i < avg.Length; i++)
                    {
                        Assert.Equal(avg.Values[i], sum.Values[i] / 2);
                    }
                    b = cursor.MoveNext();
                    Assert.False(b);
                }

                data = experiment.GetOutput(crossValidateOutput.PerInstanceMetrics);
                Assert.True(data.Schema.TryGetColumnIndex("Instance", out int nameCol));
                using (var cursor = data.GetRowCursor(col => col == nameCol))
                {
                    var getter = cursor.GetGetter <ReadOnlyMemory <char> >(nameCol);
                    while (cursor.MoveNext())
                    {
                        ReadOnlyMemory <char> name = default;
                        getter(ref name);
                        Assert.Subset(new HashSet <string>()
                        {
                            "Private", "?", "Federal-gov"
                        }, new HashSet <string>()
                        {
                            name.ToString()
                        });
                        if (cursor.Position > 4)
                        {
                            break;
                        }
                    }
                }
            }
        }
예제 #21
0
        public void TestOvaMacroWithUncalibratedLearner()
        {
            var dataPath = GetDataPath(@"iris.txt");

            using (var env = new ConsoleEnvironment(42))
            {
                // Specify subgraph for OVA
                var subGraph     = env.CreateExperiment();
                var learnerInput = new Legacy.Trainers.AveragedPerceptronBinaryClassifier {
                    Shuffle = false
                };
                var learnerOutput = subGraph.Add(learnerInput);
                // Create pipeline with OVA and multiclass scoring.
                var experiment  = env.CreateExperiment();
                var importInput = new Legacy.Data.TextLoader(dataPath);
                importInput.Arguments.Column = new TextLoaderColumn[]
                {
                    new TextLoaderColumn {
                        Name = "Label", Source = new[] { new TextLoaderRange(0) }
                    },
                    new TextLoaderColumn {
                        Name = "Features", Source = new[] { new TextLoaderRange(1, 4) }
                    }
                };
                var importOutput = experiment.Add(importInput);
                var oneVersusAll = new Legacy.Models.OneVersusAll
                {
                    TrainingData     = importOutput.Data,
                    Nodes            = subGraph,
                    UseProbabilities = true,
                };
                var ovaOutput  = experiment.Add(oneVersusAll);
                var scoreInput = new Legacy.Transforms.DatasetScorer
                {
                    Data           = importOutput.Data,
                    PredictorModel = ovaOutput.PredictorModel
                };
                var scoreOutput = experiment.Add(scoreInput);
                var evalInput   = new Legacy.Models.ClassificationEvaluator
                {
                    Data = scoreOutput.ScoredData
                };
                var evalOutput = experiment.Add(evalInput);
                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();

                var data   = experiment.GetOutput(evalOutput.OverallMetrics);
                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == accCol))
                {
                    var getter = cursor.GetGetter <double>(accCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double acc = 0;
                    getter(ref acc);
                    Assert.Equal(0.71, acc, 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
예제 #22
0
        private static ITransformModel CreateKcHousePricePredictorModel(string dataPath)
        {
            Experiment experiment = s_environment.CreateExperiment();
            var        importData = new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { ',' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Id",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Legacy.Data.DataKind.Text
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Date",
                            Source = new [] { new TextLoaderRange(1) },
                            Type   = Legacy.Data.DataKind.Text
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(2) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Bedrooms",
                            Source = new [] { new TextLoaderRange(3) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Bathrooms",
                            Source = new [] { new TextLoaderRange(4) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLiving",
                            Source = new [] { new TextLoaderRange(5) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLot",
                            Source = new [] { new TextLoaderRange(6) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Floors",
                            Source = new [] { new TextLoaderRange(7) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Waterfront",
                            Source = new [] { new TextLoaderRange(8) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "View",
                            Source = new [] { new TextLoaderRange(9) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Condition",
                            Source = new [] { new TextLoaderRange(10) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Grade",
                            Source = new [] { new TextLoaderRange(11) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftAbove",
                            Source = new [] { new TextLoaderRange(12) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftBasement",
                            Source = new [] { new TextLoaderRange(13) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "YearBuilt",
                            Source = new [] { new TextLoaderRange(14) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "YearRenovated",
                            Source = new [] { new TextLoaderRange(15) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Zipcode",
                            Source = new [] { new TextLoaderRange(16) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Lat",
                            Source = new [] { new TextLoaderRange(17) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Long",
                            Source = new [] { new TextLoaderRange(18) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLiving15",
                            Source = new [] { new TextLoaderRange(19) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLot15",
                            Source = new [] { new TextLoaderRange(20) },
                            Type   = Legacy.Data.DataKind.Num
                        },
                    }
                }

                //new Data.CustomTextLoader();
                // importData.CustomSchema = dataSchema;
                //
            };

            Legacy.Data.TextLoader.Output imported = experiment.Add(importData);
            var numericalConcatenate = new Legacy.Transforms.ColumnConcatenator();

            numericalConcatenate.Data = imported.Data;
            numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15");
            Legacy.Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate);

            var categoryConcatenate = new Legacy.Transforms.ColumnConcatenator();

            categoryConcatenate.Data = numericalConcatenated.OutputData;
            categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode");
            Legacy.Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate);

            var categorize = new Legacy.Transforms.CategoricalOneHotVectorizer();

            categorize.AddColumn("CategoryFeatures");
            categorize.Data = categoryConcatenated.OutputData;
            Legacy.Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize);

            var featuresConcatenate = new Legacy.Transforms.ColumnConcatenator();

            featuresConcatenate.Data = categorized.OutputData;
            featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures");
            Legacy.Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate);

            var learner = new Legacy.Trainers.StochasticDualCoordinateAscentRegressor();

            learner.TrainingData = featuresConcatenated.OutputData;
            learner.NumThreads   = 1;
            Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner);

            var combineModels = new Legacy.Transforms.ManyHeterogeneousModelCombiner();

            combineModels.TransformModels = new ArrayVar <ITransformModel>(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model);
            combineModels.PredictorModel  = learnerOutput.PredictorModel;
            Legacy.Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels);

            var scorer = new Legacy.Transforms.Scorer
            {
                PredictorModel = combinedModels.PredictorModel
            };

            var scorerOutput = experiment.Add(scorer);

            experiment.Compile();
            experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false));
            experiment.Run();

            return(experiment.GetOutput(scorerOutput.ScoringTransform));
        }