Exemple #1
0
 public void ConversionMinValueToNullBehavior()
 {
     using (var env = new TlcEnvironment())
     {
         var data = new List <ConversionLossMinValueClass>
         {
             new ConversionLossMinValueClass()
             {
                 fSByte = null, fInt = null, fLong = null, fShort = null
             },
             new ConversionLossMinValueClass()
             {
                 fSByte = sbyte.MinValue, fInt = int.MinValue, fLong = long.MinValue, fShort = short.MinValue
             }
         };
         foreach (var field in typeof(ConversionLossMinValueClass).GetFields())
         {
             var dataView   = ComponentCreation.CreateDataView(env, data);
             var enumerator = dataView.AsEnumerable <ConversionLossMinValueClass>(env, false).GetEnumerator();
             while (enumerator.MoveNext())
             {
                 Assert.True(enumerator.Current.fInt == null && enumerator.Current.fLong == null &&
                             enumerator.Current.fSByte == null && enumerator.Current.fShort == null);
             }
         }
     }
 }
Exemple #2
0
        /// <summary>
        /// Performs train-test on a pipeline.
        /// </summary>
        /// <typeparam name="TInput">Class type that represents input schema.</typeparam>
        /// <typeparam name="TOutput">Class type that represents prediction schema.</typeparam>
        /// <param name="pipeline">Machine learning pipeline that contains <see cref="ILearningPipelineLoader"/>,
        /// transforms and at least one trainer.</param>
        /// <param name="testData"><see cref="ILearningPipelineLoader"/> that represents the test dataset.</param>
        /// <returns>Metrics and predictor model.</returns>
        public TrainTestEvaluatorOutput <TInput, TOutput> TrainTestEvaluate <TInput, TOutput>(LearningPipeline pipeline, ILearningPipelineLoader testData)
            where TInput : class
            where TOutput : class, new()
        {
            using (var environment = new TlcEnvironment())
            {
                Experiment                     subGraph              = environment.CreateExperiment();
                ILearningPipelineStep          step                  = null;
                List <ILearningPipelineLoader> loaders               = new List <ILearningPipelineLoader>();
                List <Var <ITransformModel> >  transformModels       = new List <Var <ITransformModel> >();
                Var <ITransformModel>          lastTransformModel    = null;
                Var <IDataView>                firstPipelineDataStep = null;
                Var <IPredictorModel>          firstModel            = null;
                ILearningPipelineItem          firstTransform        = null;
                foreach (ILearningPipelineItem currentItem in pipeline)
                {
                    if (currentItem is ILearningPipelineLoader loader)
                    {
                        loaders.Add(loader);
                        continue;
                    }

                    step = currentItem.ApplyStep(step, subGraph);

                    if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null)
                    {
                        transformModels.Add(dataStep.Model);
                        if (firstPipelineDataStep == null)
                        {
                            firstPipelineDataStep = dataStep.Data;
                            firstTransform        = currentItem;
                        }
                    }
Exemple #3
0
        /// <summary>
        /// Called at the beginning of a test - it dumps the usage of the Arguments class(es).
        /// </summary>
        private static void Init(IndentingTextWriter wrt, object defaults)
        {
            var env = new TlcEnvironment(seed: 42);

            wrt.WriteLine("Usage:");
            wrt.WriteLine(CmdParser.ArgumentsUsage(env, defaults.GetType(), defaults, false, 200));
        }
Exemple #4
0
        public void TestPipelineNodeCloning()
        {
            using (var env = new TlcEnvironment())
            {
                var lr1 = RecipeInference
                          .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
                          .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("LogisticRegression"));

                var sdca1 = RecipeInference
                            .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
                            .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("StochasticDualCoordinateAscent"));

                // Clone and change hyperparam values
                var lr2 = lr1.Clone();
                lr1.PipelineNode.SweepParams[0].RawValue = 1.2f;
                lr2.PipelineNode.SweepParams[0].RawValue = 3.5f;
                var sdca2 = sdca1.Clone();
                sdca1.PipelineNode.SweepParams[0].RawValue = 3;
                sdca2.PipelineNode.SweepParams[0].RawValue = 0;

                // Make sure the changes are propagated to entry point objects
                env.Check(lr1.PipelineNode.UpdateProperties());
                env.Check(lr2.PipelineNode.UpdateProperties());
                env.Check(sdca1.PipelineNode.UpdateProperties());
                env.Check(sdca2.PipelineNode.UpdateProperties());
                env.Check(lr1.PipelineNode.CheckEntryPointStateMatchesParamValues());
                env.Check(lr2.PipelineNode.CheckEntryPointStateMatchesParamValues());
                env.Check(sdca1.PipelineNode.CheckEntryPointStateMatchesParamValues());
                env.Check(sdca2.PipelineNode.CheckEntryPointStateMatchesParamValues());

                // Make sure second object's set of changes didn't overwrite first object's
                env.Check(!lr1.PipelineNode.SweepParams[0].RawValue.Equals(lr2.PipelineNode.SweepParams[0].RawValue));
                env.Check(!sdca2.PipelineNode.SweepParams[0].RawValue.Equals(sdca1.PipelineNode.SweepParams[0].RawValue));
            }
        }
        void New_DecomposableTrainAndPredict()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new TlcEnvironment())
            {
                var data = new MyTextLoader(env, MakeIrisTextLoaderArgs())
                           .FitAndRead(new MultiFileSource(dataPath));

                var pipeline = new MyConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                               .Append(new MyTermTransform(env, "Label"), TransformerScope.TrainTest)
                               .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1
                }, "Features", "Label"))
                               .Append(new MyKeyToValueTransform(env, "PredictedLabel"));

                var model  = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
                var engine = new MyPredictionEngine <IrisDataNoLabel, IrisPrediction>(env, model);

                var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath));
                var testData   = testLoader.AsEnumerable <IrisData>(env, false);
                foreach (var input in testData.Take(20))
                {
                    var prediction = engine.Predict(input);
                    Assert.True(prediction.PredictedLabel == input.Label);
                }
            }
        }
Exemple #6
0
 void TestCommandLine()
 {
     using (var env = new TlcEnvironment())
     {
         Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0} xf=Term{col=B:A} in=f:\2.txt" }), (int)0);
     }
 }
Exemple #7
0
        void New_MultithreadedPrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var reader = new TextLoader(env, MakeSentimentTextLoaderArgs());
                var data   = reader.Read(new MultiFileSource(dataPath));

                // Pipeline.
                var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs())
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(data);

                // Create prediction engine and test predictions.
                var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model);

                // Take a couple examples out of the test data and run predictions on top.
                var testData = reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath)))
                               .AsEnumerable <SentimentData>(env, false);

                Parallel.ForEach(testData, (input) =>
                {
                    lock (engine)
                    {
                        var prediction = engine.Predict(input);
                    }
                });
            }
        }
Exemple #8
0
        public void TestLearnerConstrainingByName()
        {
            string pathData             = GetDataPath("adult.train");
            int    numOfSampleRows      = 1000;
            int    batchSize            = 1;
            int    numIterations        = 1;
            int    numTransformLevels   = 2;
            var    retainedLearnerNames = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" };

            using (var env = new TlcEnvironment())
            {
                SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc);

                // Using the simple, uniform random sampling (with replacement) brain.
                PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);

                // Run initial experiment.
                var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _,
                                                        numTransformLevels, batchSize, metric, out var _, numOfSampleRows,
                                                        new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);

                // Keep only logistic regression and FastTree.
                amls.KeepSelectedLearners(retainedLearnerNames);
                var space = amls.GetSearchSpace();

                // Make sure only learners left are those retained.
                Assert.Equal(retainedLearnerNames.Length, space.Item2.Length);
                Assert.True(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName)));
            }
        }
Exemple #9
0
        public void AutoNormalizationAndCaching()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(false), loader);

                // Train.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads           = 1,
                    ConvergenceTolerance = 1f
                });

                // Auto-caching.
                IDataView trainData  = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, trans, prefetch: null) : trans;
                var       trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features");

                // Auto-normalization.
                NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer);
                var predictor = trainer.Train(new Runtime.TrainContext(trainRoles));
            }
        }
Exemple #10
0
        public void ClassWithInheritedPropertiesConversion()
        {
            var data = new List <ClassWithInheritedProperties>()
            {
                new ClassWithInheritedProperties()
                {
                    IntProp = 1, StringProp = "lala", LongProp = 17, ByteProp = 3
                },
                new ClassWithInheritedProperties()
                {
                    IntProp = -1, StringProp = "", LongProp = 2, ByteProp = 4
                },
                new ClassWithInheritedProperties()
                {
                    IntProp = 0, StringProp = null, LongProp = 18, ByteProp = 5
                }
            };

            using (var env = new TlcEnvironment())
            {
                var dataView           = ComponentCreation.CreateDataView(env, data);
                var enumeratorSimple   = dataView.AsEnumerable <ClassWithInheritedProperties>(env, false).GetEnumerator();
                var originalEnumerator = data.GetEnumerator();
                while (enumeratorSimple.MoveNext() && originalEnumerator.MoveNext())
                {
                    Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current));
                }
                Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext());
            }
        }
Exemple #11
0
        public void TestRegressionPipelineWithMinimizingMetric()
        {
            string pathData           = GetDataPath("../Housing (regression)/housing.txt");
            int    numOfSampleRows    = 100;
            int    batchSize          = 5;
            int    numIterations      = 10;
            int    numTransformLevels = 1;

            using (var env = new TlcEnvironment())
            {
                SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro);

                // Using the simple, uniform random sampling (with replacement) brain
                PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);

                // Run initial experiments
                var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize,
                                                        metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations),
                                                        MacroUtils.TrainerKinds.SignatureRegressorTrainer);

                // Allow for one more iteration
                amls.UpdateTerminator(new IterationTerminator(numIterations + 1));

                // Do learning. Only retained learner should be left in all pipelines.
                bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);

                // Make sure hyperparameter value did not change
                Assert.NotNull(bestPipeline);
                Assert.True(amls.GetAllEvaluatedPipelines().All(
                                p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue));
            }
        }
Exemple #12
0
        public void ClassWithPrivateFieldsAndPropertiesConversion()
        {
            var data = new List <ClassWithPrivateFieldsAndProperties>()
            {
                new ClassWithPrivateFieldsAndProperties()
                {
                    StringProp = "lala"
                },
                new ClassWithPrivateFieldsAndProperties()
                {
                    StringProp = "baba"
                }
            };

            using (var env = new TlcEnvironment())
            {
                var dataView           = ComponentCreation.CreateDataView(env, data);
                var enumeratorSimple   = dataView.AsEnumerable <ClassWithPrivateFieldsAndProperties>(env, false).GetEnumerator();
                var originalEnumerator = data.GetEnumerator();
                while (enumeratorSimple.MoveNext() && originalEnumerator.MoveNext())
                {
                    Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current));
                    Assert.True(enumeratorSimple.Current.UnusedPropertyWithPrivateSetter == 100);
                }
                Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext());
            }
        }
Exemple #13
0
        public void ClassWithMixOfFieldsAndPropertiesConversion()
        {
            var data = new List <ClassWithMixOfFieldsAndProperties>()
            {
                new ClassWithMixOfFieldsAndProperties()
                {
                    IntProp = 1, fString = "lala"
                },
                new ClassWithMixOfFieldsAndProperties()
                {
                    IntProp = -1, fString = ""
                },
                new ClassWithMixOfFieldsAndProperties()
                {
                    IntProp = 0, fString = null
                }
            };

            using (var env = new TlcEnvironment())
            {
                var dataView           = ComponentCreation.CreateDataView(env, data);
                var enumeratorSimple   = dataView.AsEnumerable <ClassWithMixOfFieldsAndProperties>(env, false).GetEnumerator();
                var originalEnumerator = data.GetEnumerator();
                while (enumeratorSimple.MoveNext() && originalEnumerator.MoveNext())
                {
                    Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current));
                }
                Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext());
            }
        }
Exemple #14
0
 public void ConversionMinValueToNullBehaviorProperties()
 {
     using (var env = new TlcEnvironment())
     {
         var data = new List <ConversionLossMinValueClassProperties>
         {
             new ConversionLossMinValueClassProperties()
             {
                 SByteProp = null, IntProp = null, LongProp = null, ShortProp = null
             },
             new ConversionLossMinValueClassProperties()
             {
                 SByteProp = sbyte.MinValue, IntProp = int.MinValue, LongProp = long.MinValue, ShortProp = short.MinValue
             }
         };
         foreach (var field in typeof(ConversionLossMinValueClassProperties).GetFields())
         {
             var dataView   = ComponentCreation.CreateDataView(env, data);
             var enumerator = dataView.AsEnumerable <ConversionLossMinValueClassProperties>(env, false).GetEnumerator();
             while (enumerator.MoveNext())
             {
                 Assert.True(enumerator.Current.IntProp == null && enumerator.Current.LongProp == null &&
                             enumerator.Current.SByteProp == null && enumerator.Current.ShortProp == null);
             }
         }
     }
 }
Exemple #15
0
        void New_FileBasedSavingOfData()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));

                var trainData = pipeline.Fit(new MultiFileSource(dataPath)).Read(new MultiFileSource(dataPath));

                using (var file = env.CreateOutputFile("i.idv"))
                    trainData.SaveAsBinary(env, file.CreateWriteStream());

                var trainer = new MySdca(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label");
                var loadedTrainData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv"));

                // Train.
                var model = trainer.Train(loadedTrainData);
                DeleteOutputPath("i.idv");
            }
        }
Exemple #16
0
        public void TrainWithValidationSet()
        {
            var dataPath           = GetDataPath(SentimentDataPath);
            var validationDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans     = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);
                var trainData = trans;

                // Apply the same transformations on the validation set.
                // Sadly, there is no way to easily apply the same loader to different data, so we either have
                // to create another loader, or to save the loader to model file and then reload.

                // A new one is not always feasible, but this time it is.
                var validLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(validationDataPath));
                var validData   = ApplyTransformUtils.ApplyAllTransformsToData(env, trainData, validLoader);

                // Cache both datasets.
                var cachedTrain = new CacheDataView(env, trainData, prefetch: null);
                var cachedValid = new CacheDataView(env, validData, prefetch: null);

                // Train.
                var trainer = new FastTreeBinaryClassificationTrainer(env, new FastTreeBinaryClassificationTrainer.Arguments
                {
                    NumTrees = 3
                });
                var trainRoles = new RoleMappedData(cachedTrain, label: "Label", feature: "Features");
                var validRoles = new RoleMappedData(cachedValid, label: "Label", feature: "Features");
                trainer.Train(new Runtime.TrainContext(trainRoles, validRoles));
            }
        }
Exemple #17
0
        public void TestEstimatorSaveLoad()
        {
            using (var env = new TlcEnvironment())
            {
                var dataFile    = GetDataPath("images/images.tsv");
                var imageFolder = Path.GetDirectoryName(dataFile);
                var data        = env.CreateLoader("Text{col=ImagePath:TX:0 col=Name:TX:1}", new MultiFileSource(dataFile));

                var pipe = new ImageLoaderEstimator(env, imageFolder, ("ImagePath", "ImageReal"))
                           .Append(new ImageResizerEstimator(env, "ImageReal", "ImageReal", 100, 100))
                           .Append(new ImagePixelExtractorEstimator(env, "ImageReal", "ImagePixels"))
                           .Append(new ImageGrayscaleEstimator(env, ("ImageReal", "ImageGray")));

                pipe.GetOutputSchema(Core.Data.SchemaShape.Create(data.Schema));
                var model = pipe.Fit(data);

                using (var file = env.CreateTempFile())
                {
                    using (var fs = file.CreateWriteStream())
                        model.SaveTo(env, fs);
                    var model2 = TransformerChain.LoadFrom(env, file.OpenReadStream());

                    var newCols = ((ImageLoaderTransform)model2.First()).Columns;
                    var oldCols = ((ImageLoaderTransform)model.First()).Columns;
                    Assert.True(newCols
                                .Zip(oldCols, (x, y) => x == y)
                                .All(x => x));
                }
            }
            Done();
        }
Exemple #18
0
        void New_CrossValidation()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var data = new TextLoader(env, MakeSentimentTextLoaderArgs())
                           .Read(new MultiFileSource(dataPath));
                // Pipeline.
                var pipeline = new TextTransform(env, "SentimentText", "Features")
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads           = 1,
                    ConvergenceTolerance = 1f
                }, "Features", "Label"));

                var cv = new MyCrossValidation.BinaryCrossValidator(env)
                {
                    NumFolds = 2
                };

                var cvResult = cv.CrossValidate(data, pipeline);
            }
        }
 public void TestCommandLine()
 {
     using (var env = new TlcEnvironment())
     {
         Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0} xf=Term{col=B:A} xf=KeyToVector{col=C:B col={name=D source=B bag+}} in=f:\2.txt" }), (int)0);
     }
 }
        public void New_TrainWithInitialPredictor()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));

                // Train the pipeline, prepare train set.
                var reader    = pipeline.Fit(new MultiFileSource(dataPath));
                var trainData = reader.Read(new MultiFileSource(dataPath));


                // Train the first predictor.
                var trainer = new MySdca(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                }, "Features", "Label");
                var firstModel = trainer.Fit(trainData);

                // Train the second predictor on the same data.
                var secondTrainer = new MyAveragedPerceptron(env, new AveragedPerceptronTrainer.Arguments(), "Features", "Label");
                var finalModel    = secondTrainer.Train(trainData, firstModel.InnerModel);
            }
        }
        public void New_SimpleTrainAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()))
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(new MultiFileSource(dataPath));

                // Create prediction engine and test predictions.
                var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model.Transformer);

                // Take a couple examples out of the test data and run predictions on top.
                var testData = model.Reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath)))
                               .AsEnumerable <SentimentData>(env, false);
                foreach (var input in testData.Take(5))
                {
                    var prediction = engine.Predict(input);
                    // Verify that predictions match and scores are separated from zero.
                    Assert.Equal(input.Sentiment, prediction.Sentiment);
                    Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
                }
            }
        }
        public void InferSchemaCommandTest()
        {
            var datasets = new[]
            {
                GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv"))
            };

            using (var env = new TlcEnvironment())
            {
                var h = env.Register("InferSchemaCommandTest", seed: 0, verbose: false);
                using (var ch = h.Start("InferSchemaCommandTest"))
                {
                    for (int i = 0; i < datasets.Length; i++)
                    {
                        var    outFile  = string.Format("dataset-infer-schema-result-{0:00}.txt", i);
                        string dataPath = GetOutputPath(Path.Combine("..", "Common", "Inference"), outFile);
                        var    args     = new InferSchemaCommand.Arguments()
                        {
                            DataFile   = datasets[i],
                            OutputFile = dataPath,
                        };

                        var cmd = new InferSchemaCommand(Env, args);
                        cmd.Run();

                        CheckEquality(Path.Combine("..", "Common", "Inference"), outFile);
                    }
                }
            }
            Done();
        }
Exemple #23
0
        public void New_ReconfigurablePrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var dataReader = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                                 .Fit(new MultiFileSource(dataPath));

                var data     = dataReader.Read(new MultiFileSource(dataPath));
                var testData = dataReader.Read(new MultiFileSource(testDataPath));

                // Pipeline.
                var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs())
                               .Fit(data);

                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label");
                var trainData = pipeline.Transform(data);
                var model     = trainer.Fit(trainData);

                var scoredTest = model.Transform(pipeline.Transform(testData));
                var metrics    = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()).Evaluate(scoredTest, "Label", "Probability");

                var newModel      = new BinaryPredictionTransformer <IPredictorProducing <float> >(env, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability);
                var newScoredTest = newModel.Transform(pipeline.Transform(testData));
                var newMetrics    = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments {
                    Threshold = 0.01f, UseRawScoreThreshold = false
                }).Evaluate(newScoredTest, "Label", "Probability");
            }
        }
Exemple #24
0
        void FileBasedSavingOfData()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);
                var saver = new BinarySaver(env, new BinarySaver.Arguments());
                using (var ch = env.Start("SaveData"))
                    using (var file = env.CreateOutputFile("i.idv"))
                    {
                        DataSaverUtils.SaveDataView(ch, saver, trans, file);
                    }

                var binData    = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv"));
                var trainRoles = new RoleMappedData(binData, label: "Label", feature: "Features");
                var trainer    = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });
                var predictor = trainer.Train(new Runtime.TrainContext(trainRoles));

                DeleteOutputPath("i.idv");
            }
        }
Exemple #25
0
        public void TestSimpleExperiment()
        {
            var dataPath = GetDataPath(@"adult.tiny.with-schema.txt");

            using (var env = new TlcEnvironment())
            {
                var experiment = env.CreateExperiment();

                var importInput  = new ML.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var normalizeInput = new ML.Transforms.MinMaxNormalizer
                {
                    Data = importOutput.Data
                };
                normalizeInput.AddColumn("NumericFeatures");
                var normalizeOutput = experiment.Add(normalizeInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(normalizeOutput.OutputData);

                var schema = data.Schema;
                Assert.Equal(5, schema.ColumnCount);
                var expected = new[] { "Label", "Workclass", "Categories", "NumericFeatures", "NumericFeatures" };
                for (int i = 0; i < schema.ColumnCount; i++)
                {
                    Assert.Equal(expected[i], schema.GetColumnName(i));
                }
            }
        }
Exemple #26
0
        public void New_Evaluation()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()))
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(new MultiFileSource(dataPath));

                // Evaluate on the test set.
                var dataEval  = model.Read(new MultiFileSource(testDataPath));
                var evaluator = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()
                {
                });
                var metrics = evaluator.Evaluate(dataEval);
            }
        }
Exemple #27
0
        /// <summary>
        /// Trains a model using SAR.
        /// </summary>
        /// <param name="settings">The training settings</param>
        /// <param name="usageEvents">The usage events to use for training</param>
        /// <param name="catalogItems">The catalog items to use for training</param>
        /// <param name="uniqueUsersCount">The number of users in the user id index file.</param>
        /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param>
        /// <param name="cancellationToken">A cancellation token</param>
        public IPredictorModel Train(ITrainingSettings settings,
                                     IList <SarUsageEvent> usageEvents,
                                     IList <SarCatalogItem> catalogItems,
                                     int uniqueUsersCount,
                                     int uniqueUsageItemsCount,
                                     CancellationToken cancellationToken)
        {
            if (settings == null)
            {
                throw new ArgumentNullException(nameof(settings));
            }

            if (usageEvents == null)
            {
                throw new ArgumentNullException(nameof(usageEvents));
            }

            if (settings.EnableColdItemPlacement && catalogItems == null)
            {
                throw new ArgumentNullException(nameof(catalogItems));
            }

            if (uniqueUsersCount < 0)
            {
                var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer");
                _tracer.TraceWarning(exception.ToString());
                throw exception;
            }

            if (uniqueUsageItemsCount < 0)
            {
                var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer");
                _tracer.TraceWarning(exception.ToString());
                throw exception;
            }

            cancellationToken.ThrowIfCancellationRequested();

            using (TlcEnvironment environment = new TlcEnvironment(verbose: true))
            {
                _detectedFeatureWeights = null;
                try
                {
                    environment.AddListener <ChannelMessage>(ChannelMessageListener);
                    IHost environmentHost = environment.Register("SarHost");

                    // bind the cancellation token to SAR cancellation
                    using (cancellationToken.Register(() => { environmentHost.StopExecution(); }))
                    {
                        _tracer.TraceInformation("Starting training model using SAR");
                        return(TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount,
                                          uniqueUsageItemsCount));
                    }
                }
                finally
                {
                    environment.RemoveListener <ChannelMessage>(ChannelMessageListener);
                }
            }
        }
        public LearningPipelineDebugProxy(LearningPipeline pipeline)
        {
            if (pipeline == null)
            {
                throw new ArgumentNullException(nameof(pipeline));
            }

            _pipeline = new LearningPipeline();

            // use a ConcurrencyFactor of 1 so other threads don't need to run in the debugger
            _environment = new TlcEnvironment(conc: 1);

            foreach (ILearningPipelineItem item in pipeline)
            {
                _pipeline.Add(item);

                if (item is ILearningPipelineLoader loaderItem)
                {
                    // add a take filter to any loaders, so it returns in a reasonable
                    // amount of time
                    _pipeline.Add(new RowTakeFilter()
                    {
                        Count = MaxLoaderRows
                    });
                }
            }
        }
Exemple #29
0
        public void New_TrainWithInitialPredictor()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var data = new TextLoader(env, MakeSentimentTextLoaderArgs()).Read(new MultiFileSource(dataPath));

                // Pipeline.
                var pipeline = new TextTransform(env, "SentimentText", "Features");

                // Train the pipeline, prepare train set.
                var trainData = pipeline.FitAndTransform(data);

                // Train the first predictor.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                }, "Features", "Label");
                var firstModel = trainer.Fit(trainData);

                // Train the second predictor on the same data.
                var secondTrainer = new AveragedPerceptronTrainer(env, new AveragedPerceptronTrainer.Arguments());

                var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features");
                var finalModel = secondTrainer.Train(new TrainContext(trainRoles, initialPredictor: firstModel.Model));
            }
        }
Exemple #30
0
 public void ConversionExceptionsBehavior()
 {
     using (var env = new TlcEnvironment())
     {
         var data = new ConversionNotSupportedMinValueClass[1];
         foreach (var field in typeof(ConversionNotSupportedMinValueClass).GetFields())
         {
             data[0] = new ConversionNotSupportedMinValueClass();
             FieldInfo fi;
             if ((fi = field.FieldType.GetField("MinValue")) != null)
             {
                 field.SetValue(data[0], fi.GetValue(null));
             }
             var dataView   = ComponentCreation.CreateDataView(env, data);
             var enumerator = dataView.AsEnumerable <ConversionNotSupportedMinValueClass>(env, false).GetEnumerator();
             try
             {
                 enumerator.MoveNext();
                 Assert.True(false);
             }
             catch
             {
             }
         }
     }
 }