public virtual void CalculateMetrics(FeatureSubsetModel <IPredictorProducing <TOutput> > model, ISubsetSelector subsetSelector, Subset subset, Batch batch, bool needMetrics) { if (!needMetrics || model == null || model.Metrics != null) { return; } using (var ch = Host.Start("Calculate metrics")) { RoleMappedData testData = subsetSelector.GetTestData(subset, batch); // Because the training and test datasets are drawn from the same base dataset, the test data role mappings // are the same as for the train data. IDataScorerTransform scorePipe = ScoreUtils.GetScorer(model.Predictor, testData, Host, testData.Schema); // REVIEW: Should we somehow allow the user to customize the evaluator? // By what mechanism should we allow that? var evalComp = GetEvaluatorSubComponent(); RoleMappedData scoredTestData = RoleMappedData.Create(scorePipe, GetColumnRoles(testData.Schema, scorePipe.Schema)); IEvaluator evaluator = evalComp.CreateInstance(Host); // REVIEW: with the new evaluators, metrics of individual models are no longer // printed to the Console. Consider adding an option on the combiner to print them. // REVIEW: Consider adding an option to the combiner to save a data view // containing all the results of the individual models. var metricsDict = evaluator.Evaluate(scoredTestData); if (!metricsDict.TryGetValue(MetricKinds.OverallMetrics, out IDataView metricsView)) { throw Host.Except("Evaluator did not produce any overall metrics"); } // REVIEW: We're assuming that the metrics of interest are always doubles here. var metrics = EvaluateUtils.GetMetrics(metricsView, getVectorMetrics: false); model.Metrics = metrics.ToArray(); ch.Done(); } }
/// <summary> /// Implements a different behaviour when the predictor inherits from /// IPredictorScorer (the predictor knows the scorer to use). /// </summary> public static IDataScorerTransform CreateDefaultScorer(IHostEnvironment env, RoleMappedData roles, IPredictor ipredictor, RoleMappedSchema trainSchema = null) { IDataScorerTransform scorer; env.CheckValue(ipredictor, "IPredictor"); var iter = roles.Schema.GetColumnRoleNames().Where(c => c.Key.Value != RoleMappedSchema.ColumnRole.Feature.Value && c.Key.Value != RoleMappedSchema.ColumnRole.Group.Value); if (ipredictor.PredictionKind == PredictionKind.MultiClassClassification && ipredictor is IValueMapperDist) { // There is an issue with the code creating the default scorer. It expects to find a Float // as the output of DistType (from by IValueMapperDist) var newPred = new WrappedPredictorWithNoDistInterface(ipredictor); scorer = ScoreUtils.GetScorer(null, newPred, roles.Data, roles.Schema.Feature.Name, roles.Schema.Group == null ? null : roles.Schema.Group.Name, iter, env, trainSchema); } else { scorer = ScoreUtils.GetScorer(null, ipredictor, roles.Data, roles.Schema.Feature == null ? null : roles.Schema.Feature.Name, roles.Schema.Group == null ? null : roles.Schema.Group.Name, iter, env, trainSchema); } return(scorer); }
void DecomposableTrainAndPredict() { using (var env = new LocalEnvironment() .AddStandardComponents()) // ScoreUtils.GetScorer requires scorers to be registered in the ComponentCatalog { var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(GetDataPath(TestDatasets.irisData.trainFilename))); var term = TermTransform.Create(env, loader, "Label"); var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth").Transform(term); var trainer = new SdcaMultiClassTrainer(env, "Features", "Label", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Cut out term transform from pipeline. var newScorer = ApplyTransformUtils.ApplyAllTransformsToData(env, scorer, loader, term); var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(newScorer); var model = env.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(keyToValue); var testData = loader.AsEnumerable <IrisDataNoLabel>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == "Iris-setosa"); } } }
public void TestI_Q_KMeansInnerAPIWithDataFrame() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var iris = FileHelper.GetTestFile("iris.txt"); using (var env = new ConsoleEnvironment(conc: 1)) { ComponentHelper.AddStandardComponents(env); var df = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var conc = env.CreateTransform("Concat{col=Feature:Sepal_length,Sepal_width}", df); var roleMap = env.CreateExamples(conc, "Feature", label: "Label"); var trainer = CreateTrainer(env, "km"); IPredictor model; using (var ch = env.Start("test")) model = TrainUtils.Train(env, ch, roleMap, trainer, null, 0); using (var ch = env.Start("Save")) using (var fs = File.Create(outModelFilePath)) TrainUtils.SaveModel(env, ch, fs, model, roleMap); Predictor pred; using (var fs = File.OpenRead(outModelFilePath)) pred = env.LoadPredictorOrNull(fs); #pragma warning disable CS0618 var scorer = ScoreUtils.GetScorer(pred.GetPredictorObject() as IPredictor, roleMap, env, null); #pragma warning restore CS0618 var dfout = Scikit.ML.DataManipulation.DataFrameIO.ReadView(scorer); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 13)); } }
private IDataScorerTransform _TrainSentiment() { bool normalize = true; var args = new TextLoader.Arguments() { Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column("Label", DataKind.BL, 0), new TextLoader.Column("SentimentText", DataKind.Text, 1) } }; var args2 = new TextFeaturizingEstimator.Arguments() { Column = new TextFeaturizingEstimator.Column { Name = "Features", Source = new[] { "SentimentText" } }, KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizingEstimator.CaseNormalizationMode.Lower, OutputTokens = true, UsePredefinedStopWordRemover = true, VectorNormalizer = normalize ? TextFeaturizingEstimator.TextNormKind.L2 : TextFeaturizingEstimator.TextNormKind.None, CharFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments() { NgramLength = 3, AllLengths = false }, WordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments() { NgramLength = 2, AllLengths = true }, }; var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv"); using (var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, args).Read(new MultiFileSource(trainFilename)); var trans = TextFeaturizingEstimator.Create(env, args2, loader); // Train var trainer = new SdcaBinaryTrainer(env, new SdcaBinaryTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var predictor = trainer.Fit(cached); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema)); } }
private static IDataScorerTransform _TrainSentiment() { bool normalize = true; var args = new TextLoader.Options() { Separators = new[] { '\t' }, HasHeader = true, Columns = new[] { new TextLoader.Column("Label", DataKind.Boolean, 0), new TextLoader.Column("SentimentText", DataKind.String, 1) } }; var args2 = new TextFeaturizingEstimator.Options() { KeepDiacritics = false, KeepPunctuations = false, CaseMode = TextNormalizingEstimator.CaseMode.Lower, OutputTokensColumnName = "tokens", Norm = normalize ? TextFeaturizingEstimator.NormFunction.L2 : TextFeaturizingEstimator.NormFunction.None, CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 3, UseAllLengths = false }, WordFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 2, UseAllLengths = true }, }; var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv"); /*using (*/ var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1); { // Pipeline var loader = new TextLoader(env, args).Load(new MultiFileSource(trainFilename)); var trans = TextFeaturizingEstimator.Create(env, args2, loader); // Train var trainer = new SdcaLogisticRegressionBinaryTrainer(env, new SdcaLogisticRegressionBinaryTrainer.Options { LabelColumnName = "Label", FeatureColumnName = "Features" }); var cached = new Microsoft.ML.Data.CacheDataView(env, trans, prefetch: null); var predictor = trainer.Fit(cached); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema)); } }
/// <summary> /// Creates a default data scorer appropriate to the predictor's prediction kind. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="data">The data to score.</param> /// <param name="predictor">The predictor to score.</param> /// <param name="trainSchema">The training data schema from which the scorer can optionally extract /// additional information, e.g., label names. If this is <c>null</c>, no information will be /// extracted.</param> /// <returns>The scored data.</returns> public static IDataScorerTransform CreateDefaultScorer(this IHostEnvironment env, RoleMappedData data, Predictor predictor, RoleMappedSchema trainSchema = null) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckValue(predictor, nameof(predictor)); env.CheckValueOrNull(trainSchema); return(ScoreUtils.GetScorer(predictor.Pred, data, env, trainSchema)); }
void ReconfigurablePrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); IPredictor predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); using (var ch = env.Start("Calibrator training")) { predictor = CalibratorUtils.TrainCalibrator(env, ch, new PlattCalibratorTrainer(env), int.MaxValue, predictor, trainRoles); } var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var metricsDict = evaluator.Evaluate(dataEval); var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; var bindable = ScoreUtils.GetSchemaBindableMapper(env, predictor, null); var mapper = bindable.Bind(env, trainRoles.Schema); var newScorer = new BinaryClassifierScorer(env, new BinaryClassifierScorer.Arguments { Threshold = 0.01f, ThresholdColumn = DefaultColumnNames.Probability }, scoreRoles.Data, mapper, trainRoles.Schema); dataEval = new RoleMappedData(newScorer, label: "Label", feature: "Features", opt: true); var newEvaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { Threshold = 0.01f, UseRawScoreThreshold = false }); metricsDict = newEvaluator.Evaluate(dataEval); var newMetrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; } }
void Extensibility() { var dataPath = GetDataPath(IrisDataPath); using (var env = new LocalEnvironment()) { var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); Action <IrisData, IrisData> action = (i, j) => { j.Label = i.Label; j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength; j.PetalWidth = i.PetalWidth; j.SepalLength = i.SepalLength; j.SepalWidth = i.SepalWidth; }; var lambda = LambdaTransform.CreateMap(env, loader, action); var term = TermTransform.Create(env, lambda, "Label"); var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Transform(term); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(scorer); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
private IDataScorerTransform GetScorer(IHostEnvironment env, IDataView transforms, IPredictor pred, string testDataPath = null) { using (var ch = env.Start("Saving model")) using (var memoryStream = new MemoryStream()) { var trainRoles = new RoleMappedData(transforms, label: "Label", feature: "Features"); // Model cannot be saved with CacheDataView TrainUtils.SaveModel(env, ch, memoryStream, pred, trainRoles); memoryStream.Position = 0; using (var rep = RepositoryReader.Open(memoryStream, ch)) { IDataLoader testPipe = ModelFileUtils.LoadLoader(env, rep, new MultiFileSource(testDataPath), true); RoleMappedData testRoles = new RoleMappedData(testPipe, label: "Label", feature: "Features"); return(ScoreUtils.GetScorer(pred, testRoles, env, testRoles.Schema)); } } }
void MultithreadedPrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new LocalEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); Parallel.ForEach(testData, (input) => { lock (model) { var prediction = model.Predict(input); } }); } }
public void Evaluation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var metricsDict = evaluator.Evaluate(dataEval); var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; } }
public void SimpleTrainAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = model.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
private static IDataTransform Create(IHostEnvironment env, Arguments args, ITrainer trainer, IDataView input, IComponentFactory <IPredictor, ISchemaBindableMapper> mapperFactory) { Contracts.AssertValue(env, nameof(env)); env.AssertValue(args, nameof(args)); env.AssertValue(trainer, nameof(trainer)); env.AssertValue(input, nameof(input)); var host = env.Register("TrainAndScoreTransform"); using (var ch = host.Start("Train")) { ch.Trace("Constructing trainer"); var customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn); string feat; string group; var data = CreateDataFromArgs(ch, input, args, out feat, out group); var predictor = TrainUtils.Train(host, ch, data, trainer, null, args.Calibrator, args.MaxCalibrationExamples, null); return(ScoreUtils.GetScorer(args.Scorer, predictor, input, feat, group, customCols, env, data.Schema, mapperFactory)); } }
void Metacomponents() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var term = new TermTransform(env, loader, "Label"); var concat = new ConcatTransform(env, term, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); var trainer = new Ova(env, new Ova.Arguments { PredictorType = new SimpleComponentFactory <ITrainer <IPredictorProducing <float> > > ( (e) => new FastTreeBinaryClassificationTrainer(e, new FastTreeBinaryClassificationTrainer.Arguments()) ) }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, scorer, "PredictedLabel"); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
void DecomposableTrainAndPredict() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var term = new TermTransform(env, loader, "Label"); var concat = new ConcatTransform(env, term, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Cut out term transform from pipeline. var newScorer = ApplyTransformUtils.ApplyAllTransformsToData(env, scorer, loader, term); var keyToValue = new KeyToValueTransform(env, newScorer, "PredictedLabel"); var model = env.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(keyToValue); var testLoader = new TextLoader(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisDataNoLabel>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == "Iris-setosa"); } } }
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); env.CheckUserArg(!string.IsNullOrWhiteSpace(args.InputModelFile), nameof(args.InputModelFile), "The input model file is required."); IPredictor predictor; RoleMappedSchema trainSchema = null; using (var file = env.OpenInputFile(args.InputModelFile)) using (var strm = file.OpenReadStream()) using (var rep = RepositoryReader.Open(strm, env)) { ModelLoadContext.LoadModel <IPredictor, SignatureLoadModel>(env, out predictor, rep, ModelFileUtils.DirPredictor); trainSchema = ModelFileUtils.LoadRoleMappedSchemaOrNull(env, rep); } string feat = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema, nameof(args.FeatureColumn), args.FeatureColumn, DefaultColumnNames.Features); string group = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema, nameof(args.GroupColumn), args.GroupColumn, DefaultColumnNames.GroupId); var customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn); return(ScoreUtils.GetScorer(args.Scorer, predictor, input, feat, group, customCols, env, trainSchema)); }
public void TestXGBoostMultiClassification() { var methodName = string.Format("{0}", System.Reflection.MethodBase.GetCurrentMethod().Name); var dataFilePath = FileHelper.GetTestFile("iris.txt"); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); var env = EnvHelper.NewTestEnvironment(); var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-4 header=+}", new MultiFileSource(dataFilePath)); var roles = env.CreateExamples(loader, "Features", "Label"); var trainer = EnvHelper.CreateTrainer <XGBoostMulticlassTrainer>(env, "exgbmc{iter=10}"); IDataTransform pred = null; using (var ch = env.Start("Train")) { var model = trainer.Train(new TrainContext(roles)); pred = ScoreUtils.GetScorer(model, roles, env, roles.Schema); } var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName); EnvHelper.SavePredictions(env, pred, outputDataFilePath); Assert.IsTrue(File.Exists(outputDataFilePath)); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); EnvHelper.SaveModel(env, pred, outModelFilePath); Assert.IsTrue(File.Exists(outModelFilePath)); var d1 = File.ReadAllText(outputDataFilePath); Assert.IsTrue(d1.Length > 0); }
private void Run(IChannel ch) { IDataLoader loader; IPredictor rawPred; RoleMappedSchema trainSchema; if (string.IsNullOrEmpty(Args.InputModelFile)) { loader = CreateLoader(); rawPred = null; trainSchema = null; Host.CheckUserArg(Args.LoadPredictor != true, nameof(Args.LoadPredictor), "Cannot be set to true unless " + nameof(Args.InputModelFile) + " is also specifified."); } else { LoadModelObjects(ch, _loadPredictor, out rawPred, true, out trainSchema, out loader); } // Get the transform chain. IDataView source; IDataView end; LinkedList <ITransformCanSavePfa> transforms; GetPipe(ch, loader, out source, out end, out transforms); Host.Assert(transforms.Count == 0 || transforms.Last.Value == end); // If we have a predictor, try to get the scorer for it. if (rawPred != null) { RoleMappedData data; if (trainSchema != null) { data = new RoleMappedData(end, trainSchema.GetColumnRoleNames()); } else { // We had a predictor, but no roles stored in the model. Just suppose // default column names are OK, if present. data = new RoleMappedData(end, DefaultColumnNames.Label, DefaultColumnNames.Features, DefaultColumnNames.GroupId, DefaultColumnNames.Weight, DefaultColumnNames.Name, opt: true); } var scorePipe = ScoreUtils.GetScorer(rawPred, data, Host, trainSchema); var scorePfa = scorePipe as ITransformCanSavePfa; if (scorePfa?.CanSavePfa == true) { Host.Assert(scorePipe.Source == end); end = scorePipe; transforms.AddLast(scorePfa); } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but we do not know how to save it as PFA."); ch.Warning("We do not know how to save the predictor as PFA. Ignoring."); } } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but one was not present."); } var ctx = new BoundPfaContext(Host, source.Schema, _inputsToDrop, allowSet: _allowSet); foreach (var trans in transforms) { Host.Assert(trans.CanSavePfa); trans.SaveAsPfa(ctx); } var toExport = new List <string>(); for (int i = 0; i < end.Schema.ColumnCount; ++i) { if (end.Schema.IsHidden(i)) { continue; } var name = end.Schema.GetColumnName(i); if (_outputsToDrop.Contains(name)) { continue; } if (!ctx.IsInput(name) || _keepInput) { toExport.Add(name); } } JObject pfaDoc = ctx.Finalize(end.Schema, toExport.ToArray()); if (_name != null) { pfaDoc["name"] = _name; } if (_outputModelPath == null) { ch.Info(MessageSensitivity.Schema, pfaDoc.ToString(_formatting)); } else { using (var file = Host.CreateOutputFile(_outputModelPath)) using (var stream = file.CreateWriteStream()) using (var writer = new StreamWriter(stream)) writer.Write(pfaDoc.ToString(_formatting)); } if (!string.IsNullOrWhiteSpace(Args.OutputModelFile)) { ch.Trace("Saving the data pipe"); // Should probably include "end"? SaveLoader(loader, Args.OutputModelFile); } }
protected ScorerWrapper <TModel> MakeScorerBasic(TModel predictor, RoleMappedData data) { var scorer = ScoreUtils.GetScorer(predictor, data, _env, data.Schema); return((TTransformer)(new ScorerWrapper <TModel>(_env, scorer, predictor, data.Schema.Feature.Name))); }
private void Run(IChannel ch) { ILegacyDataLoader loader = null; IPredictor rawPred = null; IDataView view; RoleMappedSchema trainSchema = null; if (_model == null && _predictiveModel == null) { if (string.IsNullOrEmpty(ImplOptions.InputModelFile)) { loader = CreateLoader(); rawPred = null; trainSchema = null; Host.CheckUserArg(ImplOptions.LoadPredictor != true, nameof(ImplOptions.LoadPredictor), "Cannot be set to true unless " + nameof(ImplOptions.InputModelFile) + " is also specified."); } else { LoadModelObjects(ch, _loadPredictor, out rawPred, true, out trainSchema, out loader); } view = loader; } else if (_model != null) { view = _model.Apply(Host, new EmptyDataView(Host, _model.InputSchema)); } else { view = _predictiveModel.TransformModel.Apply(Host, new EmptyDataView(Host, _predictiveModel.TransformModel.InputSchema)); rawPred = _predictiveModel.Predictor; trainSchema = _predictiveModel.GetTrainingSchema(Host); } // Create the ONNX context for storing global information var assembly = System.Reflection.Assembly.GetExecutingAssembly(); var versionInfo = System.Diagnostics.FileVersionInfo.GetVersionInfo(assembly.Location); var ctx = new OnnxContextImpl(Host, _name, ProducerName, versionInfo.FileVersion, ModelVersion, _domain, ImplOptions.OnnxVersion); // Get the transform chain. IDataView source; IDataView end; LinkedList <ITransformCanSaveOnnx> transforms; GetPipe(ctx, ch, view, out source, out end, out transforms); Host.Assert(transforms.Count == 0 || transforms.Last.Value == end); // If we have a predictor, try to get the scorer for it. if (rawPred != null) { RoleMappedData data; if (trainSchema != null) { data = new RoleMappedData(end, trainSchema.GetColumnRoleNames()); } else { // We had a predictor, but no roles stored in the model. Just suppose // default column names are OK, if present. data = new RoleMappedData(end, DefaultColumnNames.Label, DefaultColumnNames.Features, DefaultColumnNames.GroupId, DefaultColumnNames.Weight, DefaultColumnNames.Name, opt: true); } var scorePipe = ScoreUtils.GetScorer(rawPred, data, Host, trainSchema); var scoreOnnx = scorePipe as ITransformCanSaveOnnx; if (scoreOnnx?.CanSaveOnnx(ctx) == true) { Host.Assert(scorePipe.Source == end); end = scorePipe; transforms.AddLast(scoreOnnx); if (rawPred.PredictionKind == PredictionKind.BinaryClassification || rawPred.PredictionKind == PredictionKind.MulticlassClassification) { // Check if the PredictedLabel Column is a KeyDataViewType and has KeyValue Annotations. // If it does, add a KeyToValueMappingTransformer, to enable NimbusML to get the values back // when using an ONNX model, as described in https://github.com/dotnet/machinelearning/pull/4841 var predictedLabelColumn = scorePipe.Schema.GetColumnOrNull(DefaultColumnNames.PredictedLabel); if (predictedLabelColumn.HasValue && HasKeyValues(predictedLabelColumn.Value)) { var outputData = new KeyToValueMappingTransformer(Host, DefaultColumnNames.PredictedLabel).Transform(scorePipe); end = outputData; transforms.AddLast(outputData as ITransformCanSaveOnnx); } } } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but we do not know how to save it as ONNX."); ch.Warning("We do not know how to save the predictor as ONNX. Ignoring."); } } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but one was not present."); } // Convert back to values the KeyDataViewType "pass-through" columns // (i.e those that remained untouched by the model). This is done to enable NimbusML to get these values // as described in https://github.com/dotnet/machinelearning/pull/4841 var passThroughColumnNames = GetPassThroughKeyDataViewTypeColumnsNames(source, end); foreach (var name in passThroughColumnNames) { var outputData = new KeyToValueMappingTransformer(Host, name).Transform(end); end = outputData; transforms.AddLast(end as ITransformCanSaveOnnx); } var model = ConvertTransformListToOnnxModel(ctx, ch, source, end, transforms, _inputsToDrop, _outputsToDrop); using (var file = Host.CreateOutputFile(_outputModelPath)) using (var stream = file.CreateWriteStream()) model.WriteTo(stream); if (_outputJsonModelPath != null) { using (var file = Host.CreateOutputFile(_outputJsonModelPath)) using (var stream = file.CreateWriteStream()) using (var writer = new StreamWriter(stream)) { var parsedJson = JsonConvert.DeserializeObject(model.ToString()); writer.Write(JsonConvert.SerializeObject(parsedJson, Formatting.Indented)); } } if (!string.IsNullOrWhiteSpace(ImplOptions.OutputModelFile)) { Contracts.Assert(loader != null); ch.Trace("Saving the data pipe"); // Should probably include "end"? SaveLoader(loader, ImplOptions.OutputModelFile); } }
void CrossValidation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); int numFolds = 5; using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var text = TextTransform.Create(env, MakeSentimentTextTransformArgs(false), loader); IDataView trans = new GenerateNumberTransform(env, text, "StratificationColumn"); // Train. var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1, ConvergenceTolerance = 1f }); var metrics = new List <BinaryClassificationMetrics>(); for (int fold = 0; fold < numFolds; fold++) { IDataView trainPipe = new RangeFilter(env, new RangeFilter.Arguments() { Column = "StratificationColumn", Min = (Double)fold / numFolds, Max = (Double)(fold + 1) / numFolds, Complement = true }, trans); trainPipe = new OpaqueDataView(trainPipe); var trainData = new RoleMappedData(trainPipe, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainData, trainer); var preCachedData = trainData; // Auto-caching. if (trainer.Info.WantCaching) { var prefetch = trainData.Schema.GetColumnRoles().Select(kc => kc.Value.Index).ToArray(); var cacheView = new CacheDataView(env, trainData.Data, prefetch); // Because the prefetching worked, we know that these are valid columns. trainData = new RoleMappedData(cacheView, trainData.Schema.GetColumnRoleNames()); } var predictor = trainer.Train(new Runtime.TrainContext(trainData)); IDataView testPipe = new RangeFilter(env, new RangeFilter.Arguments() { Column = "StratificationColumn", Min = (Double)fold / numFolds, Max = (Double)(fold + 1) / numFolds, Complement = false }, trans); testPipe = new OpaqueDataView(testPipe); var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, preCachedData.Data, testPipe, trainPipe); var testRoles = new RoleMappedData(pipe, trainData.Schema.GetColumnRoleNames()); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, testRoles, env, testRoles.Schema); BinaryClassifierMamlEvaluator eval = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var dataEval = new RoleMappedData(scorer, testRoles.Schema.GetColumnRoleNames(), opt: true); var dict = eval.Evaluate(dataEval); var foldMetrics = BinaryClassificationMetrics.FromMetrics(env, dict["OverallMetrics"], dict["ConfusionMatrix"]); metrics.Add(foldMetrics.Single()); } } }
/// <summary> /// Creates a scorer to compute the predictions of a trainer. /// </summary> /// <param name="predictor">predictor</param> /// <param name="data">data</param> /// <param name="env">host</param> /// <param name="trainSchema">training schema</param> /// <returns>scorer</returns> public IDataScorerTransform GetScorer(IPredictor predictor, RoleMappedData data, IHostEnvironment env, RoleMappedSchema trainSchema = null) { return(ScoreUtils.GetScorer(predictor, data, env, trainSchema)); }
private void Run(IChannel ch) { IDataLoader loader = null; IPredictor rawPred = null; IDataView view; RoleMappedSchema trainSchema = null; if (_model == null) { if (string.IsNullOrEmpty(Args.InputModelFile)) { loader = CreateLoader(); rawPred = null; trainSchema = null; Host.CheckUserArg(Args.LoadPredictor != true, nameof(Args.LoadPredictor), "Cannot be set to true unless " + nameof(Args.InputModelFile) + " is also specifified."); } else { LoadModelObjects(ch, _loadPredictor, out rawPred, true, out trainSchema, out loader); } view = loader; } else { view = _model.Apply(Host, new EmptyDataView(Host, _model.InputSchema)); } // Get the transform chain. IDataView source; IDataView end; LinkedList <ITransformCanSaveOnnx> transforms; GetPipe(ch, view, out source, out end, out transforms); Host.Assert(transforms.Count == 0 || transforms.Last.Value == end); var assembly = System.Reflection.Assembly.GetExecutingAssembly(); var versionInfo = System.Diagnostics.FileVersionInfo.GetVersionInfo(assembly.Location); var ctx = new OnnxContextImpl(Host, _name, ProducerName, versionInfo.FileVersion, ModelVersion, _domain); // If we have a predictor, try to get the scorer for it. if (rawPred != null) { RoleMappedData data; if (trainSchema != null) { data = RoleMappedData.Create(end, trainSchema.GetColumnRoleNames()); } else { // We had a predictor, but no roles stored in the model. Just suppose // default column names are OK, if present. data = TrainUtils.CreateExamplesOpt(end, DefaultColumnNames.Label, DefaultColumnNames.Features, DefaultColumnNames.GroupId, DefaultColumnNames.Weight, DefaultColumnNames.Name); } var scorePipe = ScoreUtils.GetScorer(rawPred, data, Host, trainSchema); var scoreOnnx = scorePipe as ITransformCanSaveOnnx; if (scoreOnnx?.CanSaveOnnx == true) { Host.Assert(scorePipe.Source == end); end = scorePipe; transforms.AddLast(scoreOnnx); } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but we do not know how to save it as ONNX."); ch.Warning("We do not know how to save the predictor as ONNX. Ignoring."); } } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but one was not present."); } HashSet <string> inputColumns = new HashSet <string>(); //Create graph inputs. for (int i = 0; i < source.Schema.ColumnCount; i++) { string colName = source.Schema.GetColumnName(i); if (_inputsToDrop.Contains(colName)) { continue; } ctx.AddInputVariable(source.Schema.GetColumnType(i), colName); inputColumns.Add(colName); } //Create graph nodes, outputs and intermediate values. foreach (var trans in transforms) { Host.Assert(trans.CanSaveOnnx); trans.SaveAsOnnx(ctx); } //Add graph outputs. for (int i = 0; i < end.Schema.ColumnCount; ++i) { if (end.Schema.IsHidden(i)) { continue; } var idataviewColumnName = end.Schema.GetColumnName(i);; if (_outputsToDrop.Contains(idataviewColumnName) || _inputsToDrop.Contains(idataviewColumnName)) { continue; } var variableName = ctx.TryGetVariableName(idataviewColumnName); if (variableName != null) { ctx.AddOutputVariable(end.Schema.GetColumnType(i), variableName); } } var model = ctx.MakeModel(); if (_outputModelPath != null) { using (var file = Host.CreateOutputFile(_outputModelPath)) using (var stream = file.CreateWriteStream()) model.WriteTo(stream); } if (_outputJsonModelPath != null) { using (var file = Host.CreateOutputFile(_outputJsonModelPath)) using (var stream = file.CreateWriteStream()) using (var writer = new StreamWriter(stream)) { var parsedJson = JsonConvert.DeserializeObject(model.ToString()); writer.Write(JsonConvert.SerializeObject(parsedJson, Formatting.Indented)); } } if (!string.IsNullOrWhiteSpace(Args.OutputModelFile)) { Contracts.Assert(loader != null); ch.Trace("Saving the data pipe"); // Should probably include "end"? SaveLoader(loader, Args.OutputModelFile); } }
private void Run(IChannel ch) { ILegacyDataLoader loader = null; IPredictor rawPred = null; IDataView view; RoleMappedSchema trainSchema = null; if (_model == null) { if (string.IsNullOrEmpty(ImplOptions.InputModelFile)) { loader = CreateLoader(); rawPred = null; trainSchema = null; Host.CheckUserArg(ImplOptions.LoadPredictor != true, nameof(ImplOptions.LoadPredictor), "Cannot be set to true unless " + nameof(ImplOptions.InputModelFile) + " is also specifified."); } else { LoadModelObjects(ch, _loadPredictor, out rawPred, true, out trainSchema, out loader); } view = loader; } else { view = _model.Apply(Host, new EmptyDataView(Host, _model.InputSchema)); } // Create the ONNX context for storing global information var assembly = System.Reflection.Assembly.GetExecutingAssembly(); var versionInfo = System.Diagnostics.FileVersionInfo.GetVersionInfo(assembly.Location); var ctx = new OnnxContextImpl(Host, _name, ProducerName, versionInfo.FileVersion, ModelVersion, _domain, ImplOptions.OnnxVersion); // Get the transform chain. IDataView source; IDataView end; LinkedList <ITransformCanSaveOnnx> transforms; GetPipe(ctx, ch, view, out source, out end, out transforms); Host.Assert(transforms.Count == 0 || transforms.Last.Value == end); // If we have a predictor, try to get the scorer for it. if (rawPred != null) { RoleMappedData data; if (trainSchema != null) { data = new RoleMappedData(end, trainSchema.GetColumnRoleNames()); } else { // We had a predictor, but no roles stored in the model. Just suppose // default column names are OK, if present. data = new RoleMappedData(end, DefaultColumnNames.Label, DefaultColumnNames.Features, DefaultColumnNames.GroupId, DefaultColumnNames.Weight, DefaultColumnNames.Name, opt: true); } var scorePipe = ScoreUtils.GetScorer(rawPred, data, Host, trainSchema); var scoreOnnx = scorePipe as ITransformCanSaveOnnx; if (scoreOnnx?.CanSaveOnnx(ctx) == true) { Host.Assert(scorePipe.Source == end); end = scorePipe; transforms.AddLast(scoreOnnx); } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but we do not know how to save it as ONNX."); ch.Warning("We do not know how to save the predictor as ONNX. Ignoring."); } } else { Contracts.CheckUserArg(_loadPredictor != true, nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but one was not present."); } var model = ConvertTransformListToOnnxModel(ctx, ch, source, end, transforms, _inputsToDrop, _outputsToDrop); using (var file = Host.CreateOutputFile(_outputModelPath)) using (var stream = file.CreateWriteStream()) model.WriteTo(stream); if (_outputJsonModelPath != null) { using (var file = Host.CreateOutputFile(_outputJsonModelPath)) using (var stream = file.CreateWriteStream()) using (var writer = new StreamWriter(stream)) { var parsedJson = JsonConvert.DeserializeObject(model.ToString()); writer.Write(JsonConvert.SerializeObject(parsedJson, Formatting.Indented)); } } if (!string.IsNullOrWhiteSpace(ImplOptions.OutputModelFile)) { Contracts.Assert(loader != null); ch.Trace("Saving the data pipe"); // Should probably include "end"? SaveLoader(loader, ImplOptions.OutputModelFile); } }