public void New_SimpleTrainAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(new MultiFileSource(dataPath)); // Create prediction engine and test predictions. var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model.Transformer); // Take a couple examples out of the test data and run predictions on top. var testData = model.Reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath))) .AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = engine.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
public void New_ReconfigurablePrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var dataReader = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Fit(new MultiFileSource(dataPath)); var data = dataReader.Read(new MultiFileSource(dataPath)); var testData = dataReader.Read(new MultiFileSource(testDataPath)); // Pipeline. var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs()) .Fit(data); var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label"); var trainData = pipeline.Transform(data); var model = trainer.Fit(trainData); var scoredTest = model.Transform(pipeline.Transform(testData)); var metrics = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()).Evaluate(scoredTest, "Label", "Probability"); var newModel = new BinaryPredictionTransformer <IPredictorProducing <float> >(env, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability); var newScoredTest = newModel.Transform(pipeline.Transform(testData)); var newMetrics = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments { Threshold = 0.01f, UseRawScoreThreshold = false }).Evaluate(newScoredTest, "Label", "Probability"); } }
public void New_TrainWithInitialPredictor() { var dataPath = GetDataPath(SentimentDataPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())); // Train the pipeline, prepare train set. var reader = pipeline.Fit(new MultiFileSource(dataPath)); var trainData = reader.Read(new MultiFileSource(dataPath)); // Train the first predictor. var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label"); var firstModel = trainer.Fit(trainData); // Train the second predictor on the same data. var secondTrainer = new MyAveragedPerceptron(env, new AveragedPerceptronTrainer.Arguments(), "Features", "Label"); var finalModel = secondTrainer.Train(trainData, firstModel.Model); } }
public void New_Evaluation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(new MultiFileSource(dataPath)); // Evaluate on the test set. var dataEval = model.Read(new MultiFileSource(testDataPath)); var evaluator = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments() { }); var metrics = evaluator.Evaluate(dataEval); } }
void New_FileBasedSavingOfData() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())); var trainData = pipeline.Fit(new MultiFileSource(dataPath)).Read(new MultiFileSource(dataPath)); using (var file = env.CreateOutputFile("i.idv")) trainData.SaveAsBinary(env, file.CreateWriteStream()); var trainer = new MySdca(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label"); var loadedTrainData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv")); // Train. var model = trainer.Train(loadedTrainData); DeleteOutputPath("i.idv"); } }
void New_MultithreadedPrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(new MultiFileSource(dataPath)); // Create prediction engine and test predictions. var engine = new MyPredictionEngine <SentimentData, SentimentPrediction>(env, model.Transformer); // Take a couple examples out of the test data and run predictions on top. var testData = model.Reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath))) .AsEnumerable <SentimentData>(env, false); Parallel.ForEach(testData, (input) => { lock (engine) { var prediction = engine.Predict(input); } }); } }
void New_DecomposableTrainAndPredict() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var data = new MyTextLoader(env, MakeIrisTextLoaderArgs()) .FitAndRead(new MultiFileSource(dataPath)); var pipeline = new MyConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new MyTermTransform(env, "Label"), TransformerScope.TrainTest) .Append(new MySdcaMulticlass(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }, "Features", "Label")) .Append(new MyKeyToValueTransform(env, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = new MyPredictionEngine <IrisDataNoLabel, IrisPrediction>(env, model); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
void New_CrossValidation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var data = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .FitAndRead(new MultiFileSource(dataPath)); // Pipeline. var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs()) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1, ConvergenceTolerance = 1f }, "Features", "Label")); var cv = new MyCrossValidation.BinaryCrossValidator(env) { NumFolds = 2 }; var cvResult = cv.CrossValidate(data, pipeline); } }
public void New_Metacomponents() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var data = new MyTextLoader(env, MakeIrisTextLoaderArgs()) .FitAndRead(new MultiFileSource(dataPath)); var sdcaTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }, "Features", "Label"); var pipeline = new MyConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new MyTermTransform(env, "Label"), TransformerScope.TrainTest) .Append(new MyOva(env, sdcaTrainer)) .Append(new MyKeyToValueTransform(env, "PredictedLabel")); var model = pipeline.Fit(data); } }
public void New_TrainWithValidationSet() { var dataPath = GetDataPath(SentimentDataPath); var validationDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())); // Train the pipeline, prepare train and validation set. var reader = pipeline.Fit(new MultiFileSource(dataPath)); var trainData = reader.Read(new MultiFileSource(dataPath)); var validData = reader.Read(new MultiFileSource(validationDataPath)); // Train model with validation set. var trainer = new MySdca(env, new Runtime.Learners.LinearClassificationTrainer.Arguments(), "Features", "Label"); var model = trainer.Train(trainData, validData); } }
void New_Visibility() { var dataPath = GetDataPath(SentimentDataPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs())); var data = pipeline.FitAndRead(new MultiFileSource(dataPath)); // In order to find out available column names, you can go through schema and check // column names and appropriate type for getter. for (int i = 0; i < data.Schema.ColumnCount; i++) { var columnName = data.Schema.GetColumnName(i); var columnType = data.Schema.GetColumnType(i).RawType; } using (var cursor = data.GetRowCursor(x => true)) { Assert.True(cursor.Schema.TryGetColumnIndex("SentimentText", out int textColumn)); Assert.True(cursor.Schema.TryGetColumnIndex("Features_TransformedText", out int transformedTextColumn)); Assert.True(cursor.Schema.TryGetColumnIndex("Features", out int featureColumn)); var originalTextGettter = cursor.GetGetter <DvText>(textColumn); var transformedTextGettter = cursor.GetGetter <VBuffer <DvText> >(transformedTextColumn); var featureGettter = cursor.GetGetter <VBuffer <float> >(featureColumn); DvText text = default; VBuffer <DvText> transformedText = default; VBuffer <float> features = default; while (cursor.MoveNext()) { originalTextGettter(ref text); transformedTextGettter(ref transformedText); featureGettter(ref features); } } } }
public void New_IntrospectiveTraining() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var data = new MyTextLoader(env, MakeSentimentTextLoaderArgs()) .FitAndRead(new MultiFileSource(dataPath)); var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs()) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(data); // Get feature weights. VBuffer <float> weights = default; model.LastTransformer.Model.GetFeatureWeights(ref weights); } }