public void TrainAndPredictIrisModelUsingDirectInstantiationTest() { string dataPath = GetDataPath("iris.txt"); string testDataPath = dataPath; using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, new TextLoader.Arguments() { HasHeader = false, Column = new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("PetalLength", DataKind.R4, 3), new TextLoader.Column("PetalWidth", DataKind.R4, 4) } }, new MultiFileSource(dataPath)); IDataTransform trans = new ConcatTransform(env, loader, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); // Normalizer is not automatically added though the trainer has 'NormalizeFeatures' On/Auto trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "Features"); // Train var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { NumThreads = 1 }); // Explicity adding CacheDataView since caching is not working though trainer has 'Caching' On/Auto var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var pred = trainer.Train(trainRoles); // Get scorer and evaluate the predictions from test data IDataScorerTransform testDataScorer = GetScorer(env, trans, pred, testDataPath); var metrics = Evaluate(env, testDataScorer); CompareMatrics(metrics); // Create prediction engine and test predictions var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(testDataScorer); ComparePredictions(model); // Get feature importance i.e. weight vector var summary = ((MulticlassLogisticRegressionPredictor)pred).GetSummaryInKeyValuePairs(trainRoles.Schema); Assert.Equal(7.757864, Convert.ToDouble(summary[0].Value), 5); } }
public void TrainSaveModelAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); PredictionEngine <SentimentData, SentimentPrediction> model; using (var file = env.CreateTempFile()) { // Save model. var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); using (var ch = env.Start("saving")) TrainUtils.SaveModel(env, ch, file, predictor, scoreRoles); // Load model. using (var fs = file.OpenReadStream()) model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(fs); } // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = model.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
void Extensibility() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); Action <IrisData, IrisData> action = (i, j) => { j.Label = i.Label; j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength; j.PetalWidth = i.PetalWidth; j.SepalLength = i.SepalLength; j.SepalWidth = i.SepalWidth; }; var lambda = LambdaTransform.CreateMap(env, loader, action); var term = TermTransform.Create(env, lambda, "Label"); var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Transform(term); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(scorer); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
public void Evaluation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var metricsDict = evaluator.Evaluate(dataEval); var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; } }
void MultithreadedPrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); Parallel.ForEach(testData, (input) => { lock (model) { var prediction = model.Predict(input); } }); } }
public void SimpleTrainAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = model.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
void Metacomponents() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var term = new TermTransform(env, loader, "Label"); var concat = new ConcatTransform(env, term, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); var trainer = new Ova(env, new Ova.Arguments { PredictorType = new SimpleComponentFactory <ITrainer <IPredictorProducing <float> > > ( (e) => new FastTreeBinaryClassificationTrainer(e, new FastTreeBinaryClassificationTrainer.Arguments()) ) }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, scorer, "PredictedLabel"); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
void DecomposableTrainAndPredict() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var term = TermTransform.Create(env, loader, "Label"); var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth").Transform(term); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Cut out term transform from pipeline. var newScorer = ApplyTransformUtils.ApplyAllTransformsToData(env, scorer, loader, term); var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(newScorer); var model = env.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(keyToValue); var testLoader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisDataNoLabel>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == "Iris-setosa"); } } }
public void TensorFlowTransformMNISTConvTest() { var model_location = "mnist_model/frozen_saved_model.pb"; using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var dataPath = GetDataPath("Train-Tiny-28x28.txt"); var testDataPath = GetDataPath("MNIST.Test.tiny.txt"); // Pipeline var loader = TextLoader.ReadFile(env, new TextLoader.Arguments() { Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column() { Name = "Label", Source = new [] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.Num }, new TextLoader.Column() { Name = "Placeholder", Source = new [] { new TextLoader.Range() { Min = 1, Max = 784 } }, Type = DataKind.Num } } }, new MultiFileSource(dataPath)); IDataView trans = TensorFlowTransform.Create(env, loader, model_location, "Softmax", "Placeholder"); trans = new ConcatTransform(env, trans, "reshape_input", "Placeholder"); trans = TensorFlowTransform.Create(env, trans, model_location, "dense/Relu", "reshape_input"); trans = new ConcatTransform(env, trans, "Features", "Softmax", "dense/Relu"); var trainer = new LightGbmMulticlassTrainer(env, new LightGbmArguments()); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var pred = trainer.Train(trainRoles); // Get scorer and evaluate the predictions from test data IDataScorerTransform testDataScorer = GetScorer(env, trans, pred, testDataPath); var metrics = Evaluate(env, testDataScorer); Assert.Equal(0.99, metrics.AccuracyMicro, 2); Assert.Equal(0.99, metrics.AccuracyMicro, 2); // Create prediction engine and test predictions var model = env.CreatePredictionEngine <MNISTData, MNISTPrediction>(testDataScorer); var sample1 = new MNISTData() { Placeholder = new float[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18, 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253, 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253, 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 156, 107, 253, 253, 205, 11, 0, 43, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 1, 154, 253, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 253, 190, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190, 253, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 186, 253, 253, 150, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 93, 252, 253, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 253, 249, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 130, 183, 253, 253, 207, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 148, 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114, 221, 253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 66, 213, 253, 253, 253, 253, 198, 81, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 171, 219, 253, 253, 253, 253, 195, 80, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 172, 226, 253, 253, 253, 253, 244, 133, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 253, 253, 253, 212, 135, 132, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; var prediction = model.Predict(sample1); float max = -1; int maxIndex = -1; for (int i = 0; i < prediction.PredictedLabels.Length; i++) { if (prediction.PredictedLabels[i] > max) { max = prediction.PredictedLabels[i]; maxIndex = i; } } Assert.Equal(5, maxIndex); } }