public void New_SimpleTrainAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()))
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(new MultiFileSource(dataPath));

                // Create prediction engine and test predictions.
                var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model.Transformer);

                // Take a couple examples out of the test data and run predictions on top.
                var testData = model.Reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath)))
                               .AsEnumerable <SentimentData>(env, false);
                foreach (var input in testData.Take(5))
                {
                    var prediction = engine.Predict(input);
                    // Verify that predictions match and scores are separated from zero.
                    Assert.Equal(input.Sentiment, prediction.Sentiment);
                    Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
                }
            }
        }
Esempio n. 2
0
        public void New_ReconfigurablePrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var dataReader = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                                 .Fit(new MultiFileSource(dataPath));

                var data     = dataReader.Read(new MultiFileSource(dataPath));
                var testData = dataReader.Read(new MultiFileSource(testDataPath));

                // Pipeline.
                var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs())
                               .Fit(data);

                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label");
                var trainData = pipeline.Transform(data);
                var model     = trainer.Fit(trainData);

                var scoredTest = model.Transform(pipeline.Transform(testData));
                var metrics    = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()).Evaluate(scoredTest, "Label", "Probability");

                var newModel      = new BinaryPredictionTransformer <IPredictorProducing <float> >(env, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability);
                var newScoredTest = newModel.Transform(pipeline.Transform(testData));
                var newMetrics    = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments {
                    Threshold = 0.01f, UseRawScoreThreshold = false
                }).Evaluate(newScoredTest, "Label", "Probability");
            }
        }
Esempio n. 3
0
        public void New_TrainWithInitialPredictor()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));

                // Train the pipeline, prepare train set.
                var reader    = pipeline.Fit(new MultiFileSource(dataPath));
                var trainData = reader.Read(new MultiFileSource(dataPath));


                // Train the first predictor.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                }, "Features", "Label");
                var firstModel = trainer.Fit(trainData);

                // Train the second predictor on the same data.
                var secondTrainer = new MyAveragedPerceptron(env, new AveragedPerceptronTrainer.Arguments(), "Features", "Label");
                var finalModel    = secondTrainer.Train(trainData, firstModel.Model);
            }
        }
Esempio n. 4
0
        public void New_Evaluation()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()))
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(new MultiFileSource(dataPath));

                // Evaluate on the test set.
                var dataEval  = model.Read(new MultiFileSource(testDataPath));
                var evaluator = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()
                {
                });
                var metrics = evaluator.Evaluate(dataEval);
            }
        }
Esempio n. 5
0
        void New_FileBasedSavingOfData()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));

                var trainData = pipeline.Fit(new MultiFileSource(dataPath)).Read(new MultiFileSource(dataPath));

                using (var file = env.CreateOutputFile("i.idv"))
                    trainData.SaveAsBinary(env, file.CreateWriteStream());

                var trainer = new MySdca(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label");
                var loadedTrainData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv"));

                // Train.
                var model = trainer.Train(loadedTrainData);
                DeleteOutputPath("i.idv");
            }
        }
Esempio n. 6
0
        void New_MultithreadedPrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()))
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(new MultiFileSource(dataPath));

                // Create prediction engine and test predictions.
                var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model.Transformer);

                // Take a couple examples out of the test data and run predictions on top.
                var testData = model.Reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath)))
                               .AsEnumerable <SentimentData>(env, false);

                Parallel.ForEach(testData, (input) =>
                {
                    lock (engine)
                    {
                        var prediction = engine.Predict(input);
                    }
                });
            }
        }
        void New_DecomposableTrainAndPredict()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new TlcEnvironment())
            {
                var data = new MyTextLoader(env, MakeIrisTextLoaderArgs())
                           .FitAndRead(new MultiFileSource(dataPath));

                var pipeline = new MyConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                               .Append(new MyTermTransform(env, "Label"), TransformerScope.TrainTest)
                               .Append(new MySdcaMulticlass(env, new SdcaMultiClassTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1
                }, "Features", "Label"))
                               .Append(new MyKeyToValueTransform(env, "PredictedLabel"));

                var model  = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
                var engine = new MyPredictionEngine <IrisDataNoLabel, IrisPrediction>(env, model);

                var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath));
                var testData   = testLoader.AsEnumerable <IrisData>(env, false);
                foreach (var input in testData.Take(20))
                {
                    var prediction = engine.Predict(input);
                    Assert.True(prediction.PredictedLabel == input.Label);
                }
            }
        }
Esempio n. 8
0
        void New_CrossValidation()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var data = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                           .FitAndRead(new MultiFileSource(dataPath));
                // Pipeline.
                var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs())
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads           = 1,
                    ConvergenceTolerance = 1f
                }, "Features", "Label"));

                var cv = new MyCrossValidation.BinaryCrossValidator(env)
                {
                    NumFolds = 2
                };

                var cvResult = cv.CrossValidate(data, pipeline);
            }
        }
Esempio n. 9
0
        public void New_Metacomponents()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new TlcEnvironment())
            {
                var data = new MyTextLoader(env, MakeIrisTextLoaderArgs())
                           .FitAndRead(new MultiFileSource(dataPath));

                var sdcaTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1
                }, "Features", "Label");
                var pipeline = new MyConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                               .Append(new MyTermTransform(env, "Label"), TransformerScope.TrainTest)
                               .Append(new MyOva(env, sdcaTrainer))
                               .Append(new MyKeyToValueTransform(env, "PredictedLabel"));

                var model = pipeline.Fit(data);
            }
        }
Esempio n. 10
0
        public void New_TrainWithValidationSet()
        {
            var dataPath           = GetDataPath(SentimentDataPath);
            var validationDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));

                // Train the pipeline, prepare train and validation set.
                var reader    = pipeline.Fit(new MultiFileSource(dataPath));
                var trainData = reader.Read(new MultiFileSource(dataPath));
                var validData = reader.Read(new MultiFileSource(validationDataPath));

                // Train model with validation set.
                var trainer = new MySdca(env, new Runtime.Learners.LinearClassificationTrainer.Arguments(), "Features", "Label");
                var model   = trainer.Train(trainData, validData);
            }
        }
Esempio n. 11
0
        void New_Visibility()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));
                var data = pipeline.FitAndRead(new MultiFileSource(dataPath));
                // In order to find out available column names, you can go through schema and check
                // column names and appropriate type for getter.
                for (int i = 0; i < data.Schema.ColumnCount; i++)
                {
                    var columnName = data.Schema.GetColumnName(i);
                    var columnType = data.Schema.GetColumnType(i).RawType;
                }

                using (var cursor = data.GetRowCursor(x => true))
                {
                    Assert.True(cursor.Schema.TryGetColumnIndex("SentimentText", out int textColumn));
                    Assert.True(cursor.Schema.TryGetColumnIndex("Features_TransformedText", out int transformedTextColumn));
                    Assert.True(cursor.Schema.TryGetColumnIndex("Features", out int featureColumn));

                    var              originalTextGettter    = cursor.GetGetter <DvText>(textColumn);
                    var              transformedTextGettter = cursor.GetGetter <VBuffer <DvText> >(transformedTextColumn);
                    var              featureGettter         = cursor.GetGetter <VBuffer <float> >(featureColumn);
                    DvText           text            = default;
                    VBuffer <DvText> transformedText = default;
                    VBuffer <float>  features        = default;
                    while (cursor.MoveNext())
                    {
                        originalTextGettter(ref text);
                        transformedTextGettter(ref transformedText);
                        featureGettter(ref features);
                    }
                }
            }
        }
        public void New_IntrospectiveTraining()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var data = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                           .FitAndRead(new MultiFileSource(dataPath));

                var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs())
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(data);

                // Get feature weights.
                VBuffer <float> weights = default;
                model.LastTransformer.Model.GetFeatureWeights(ref weights);
            }
        }