void New_DecomposableTrainAndPredict() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var data = new MyTextLoader(env, MakeIrisTextLoaderArgs()) .FitAndRead(new MultiFileSource(dataPath)); var pipeline = new MyConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new MyTermTransform(env, "Label"), TransformerScope.TrainTest) .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }, "Features", "Label")) .Append(new MyKeyToValueTransform(env, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = new MyPredictionEngine <IrisDataNoLabel, IrisPrediction>(env, model); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = engine.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
public void TrainSaveModelAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); PredictionEngine <SentimentData, SentimentPrediction> model; using (var file = env.CreateTempFile()) { // Save model. var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); using (var ch = env.Start("saving")) TrainUtils.SaveModel(env, ch, file, predictor, scoreRoles); // Load model. using (var fs = file.OpenReadStream()) model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(fs); } // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = model.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
void Extensibility() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); Action <IrisData, IrisData> action = (i, j) => { j.Label = i.Label; j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength; j.PetalWidth = i.PetalWidth; j.SepalLength = i.SepalLength; j.SepalWidth = i.SepalWidth; }; var lambda = LambdaTransform.CreateMap(env, loader, action); var term = new TermTransform(env, lambda, "Label"); var concat = new ConcatTransform(env, term, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, scorer, "PredictedLabel"); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
public void Evaluation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var metricsDict = evaluator.Evaluate(dataEval); var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; } }
void MultithreadedPrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); Parallel.ForEach(testData, (input) => { lock (model) { var prediction = model.Predict(input); } }); } }
public void SimpleTrainAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = model.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
void Metacomponents() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var term = new TermTransform(env, loader, "Label"); var concat = new ConcatTransform(env, term, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); var trainer = new Ova(env, new Ova.Arguments { PredictorType = new SimpleComponentFactory <ITrainer <IPredictorProducing <float> > > ( (e) => new FastTreeBinaryClassificationTrainer(e, new FastTreeBinaryClassificationTrainer.Arguments()) ) }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, scorer, "PredictedLabel"); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
void DecomposableTrainAndPredict() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var term = new TermTransform(env, loader, "Label"); var concat = new ConcatTransform(env, term, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Cut out term transform from pipeline. var newScorer = ApplyTransformUtils.ApplyAllTransformsToData(env, scorer, loader, term); var keyToValue = new KeyToValueTransform(env, newScorer, "PredictedLabel"); var model = env.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(keyToValue); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisDataNoLabel>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == "Iris-setosa"); } } }