void Extensibility() { using (var env = new LocalEnvironment()) { var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(GetDataPath(TestDatasets.irisData.trainFilename))); Action <IrisData, IrisData> action = (i, j) => { j.Label = i.Label; j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength; j.PetalWidth = i.PetalWidth; j.SepalLength = i.SepalLength; j.SepalWidth = i.SepalWidth; }; var lambda = LambdaTransform.CreateMap(env, loader, action); var term = TermTransform.Create(env, lambda, "Label"); var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Transform(term); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(scorer); var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue); var testLoader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(GetDataPath(TestDatasets.irisData.trainFilename))); var testData = testLoader.AsEnumerable <IrisData>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == input.Label); } } }
public void SdcaWorkout() { var dataPath = GetDataPath("breast-cancer.txt"); var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) .Read(dataPath); var binaryTrainer = new SdcaBinaryTrainer(Env, "Label", "Features", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(binaryTrainer, data.AsDynamic); var regressionTrainer = new SdcaRegressionTrainer(Env, "Label", "Features", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(regressionTrainer, data.AsDynamic); var mcTrainer = new SdcaMultiClassTrainer(Env, "Label", "Features", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(mcTrainer, data.AsDynamic); Done(); }
public static EstimatorChain <KeyToValueMappingTransformer> BuildAndTrainModel(IDataView trainingDataView, EstimatorChain <ITransformer> pipeline) { var trainer = new SdcaMultiClassTrainer(_mlContext, DefaultColumnNames.Label, DefaultColumnNames.Features); var trainingPipeline = pipeline.Append(trainer) .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); Console.WriteLine($"=============== Training the model ==============="); _trainedModel = trainingPipeline.Fit(trainingDataView); Console.WriteLine($"=============== Finished Training the model Ending time: {DateTime.Now.ToString()} ==============="); Console.WriteLine($"=============== Single Prediction just-trained-model ==============="); _predEngine = _trainedModel.CreatePredictionEngine <СonformChecker, CheckerPrediction>(_mlContext); СonformChecker conf = new СonformChecker() { Name = "Электронный аукцион" }; var prediction = _predEngine.Predict(conf); Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Con} ==============="); SaveModelAsFile(_mlContext, _trainedModel); return(trainingPipeline); }
void DecomposableTrainAndPredict() { var dataPath = GetDataPath(IrisDataPath); using (var env = new TlcEnvironment()) { var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var term = TermTransform.Create(env, loader, "Label"); var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth").Transform(term); var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }); IDataView trainData = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat; var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Cut out term transform from pipeline. var newScorer = ApplyTransformUtils.ApplyAllTransformsToData(env, scorer, loader, term); var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(newScorer); var model = env.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(keyToValue); var testLoader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath)); var testData = testLoader.AsEnumerable <IrisDataNoLabel>(env, false); foreach (var input in testData.Take(20)) { var prediction = model.Predict(input); Assert.True(prediction.PredictedLabel == "Iris-setosa"); } } }
public void TrainSentiment() { var env = new MLContext(seed: 1); // Pipeline var arguments = new TextLoader.Arguments() { Column = new TextLoader.Column[] { new TextLoader.Column() { Name = "Label", Source = new[] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.Num }, new TextLoader.Column() { Name = "SentimentText", Source = new[] { new TextLoader.Range() { Min = 1, Max = 1 } }, Type = DataKind.Text } }, HasHeader = true, AllowQuoting = false, AllowSparse = false }; var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments); var text = TextFeaturizingEstimator.Create(env, new TextFeaturizingEstimator.Arguments() { Column = new TextFeaturizingEstimator.Column { Name = "WordEmbeddings", Source = new[] { "SentimentText" } }, OutputTokens = true, KeepPunctuations = false, UsePredefinedStopWordRemover = true, VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None, CharFeatureExtractor = null, WordFeatureExtractor = null, }, loader); var trans = WordEmbeddingsExtractingTransformer.Create(env, new WordEmbeddingsExtractingTransformer.Arguments() { Column = new WordEmbeddingsExtractingTransformer.Column[1] { new WordEmbeddingsExtractingTransformer.Column { Name = "Features", Source = "WordEmbeddings_TransformedText" } }, ModelKind = WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe, }, text); // Train var trainer = new SdcaMultiClassTrainer(env, "Label", "Features", maxIterations: 20); var predicted = trainer.Fit(trans); _consumer.Consume(predicted); }
public void TrainSentiment() { using (var env = new ConsoleEnvironment(seed: 1)) { // Pipeline var loader = TextLoader.ReadFile(env, new TextLoader.Arguments() { AllowQuoting = false, AllowSparse = false, Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column() { Name = "Label", Source = new [] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.Num }, new TextLoader.Column() { Name = "SentimentText", Source = new [] { new TextLoader.Range() { Min = 1, Max = 1 } }, Type = DataKind.Text } } }, new MultiFileSource(_sentimentDataPath)); var text = TextFeaturizingEstimator.Create(env, new TextFeaturizingEstimator.Arguments() { Column = new TextFeaturizingEstimator.Column { Name = "WordEmbeddings", Source = new[] { "SentimentText" } }, OutputTokens = true, KeepPunctuations = false, StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None, CharFeatureExtractor = null, WordFeatureExtractor = null, }, loader); var trans = WordEmbeddingsTransform.Create(env, new WordEmbeddingsTransform.Arguments() { Column = new WordEmbeddingsTransform.Column[1] { new WordEmbeddingsTransform.Column { Name = "Features", Source = "WordEmbeddings_TransformedText" } }, ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, }, text); // Train var trainer = new SdcaMultiClassTrainer(env, "Features", "Label", maxIterations: 20); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); var predicted = trainer.Train(trainRoles); _consumer.Consume(predicted); } }
public void TrainAndPredictIrisModelUsingDirectInstantiationTest() { string dataPath = GetDataPath("iris.txt"); string testDataPath = dataPath; using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, new TextLoader.Arguments() { HasHeader = false, Column = new[] { new TextLoader.Column() { Name = "Label", Source = new [] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "SepalLength", Source = new [] { new TextLoader.Range() { Min = 1, Max = 1 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "SepalWidth", Source = new [] { new TextLoader.Range() { Min = 2, Max = 2 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "PetalLength", Source = new [] { new TextLoader.Range() { Min = 3, Max = 3 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "PetalWidth", Source = new [] { new TextLoader.Range() { Min = 4, Max = 4 } }, Type = DataKind.R4 } } }, new MultiFileSource(dataPath)); IDataTransform trans = new ConcatTransform(env, loader, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); // Normalizer is not automatically added though the trainer has 'NormalizeFeatures' On/Auto trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "Features"); // Train var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()); // Explicity adding CacheDataView since caching is not working though trainer has 'Caching' On/Auto var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = TrainUtils.CreateExamples(cached, label: "Label", feature: "Features"); trainer.Train(trainRoles); // Get scorer and evaluate the predictions from test data var pred = trainer.CreatePredictor(); IDataScorerTransform testDataScorer = GetScorer(env, trans, pred, testDataPath); var metrics = Evaluate(env, testDataScorer); CompareMatrics(metrics); // Create prediction engine and test predictions var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(testDataScorer); ComparePredictions(model); // Get feature importance i.e. weight vector var summary = ((MulticlassLogisticRegressionPredictor)pred).GetSummaryInKeyValuePairs(trainRoles.Schema); Assert.Equal(7.757867, Convert.ToDouble(summary[0].Value), 5); } }
private static IPredictor TrainSentimentCore() { var dataPath = s_sentimentDataPath; using (var env = new TlcEnvironment(seed: 1)) { // Pipeline var loader = new TextLoader(env, new TextLoader.Arguments() { AllowQuoting = false, AllowSparse = false, Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column() { Name = "Label", Source = new [] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.Num }, new TextLoader.Column() { Name = "SentimentText", Source = new [] { new TextLoader.Range() { Min = 1, Max = 1 } }, Type = DataKind.Text } } }, new MultiFileSource(dataPath)); var text = TextTransform.Create(env, new TextTransform.Arguments() { Column = new TextTransform.Column { Name = "WordEmbeddings", Source = new[] { "SentimentText" } }, KeepDiacritics = false, KeepPunctuations = false, TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower, OutputTokens = true, StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), VectorNormalizer = TextTransform.TextNormKind.None, CharFeatureExtractor = null, WordFeatureExtractor = null, }, loader); var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments() { Column = new WordEmbeddingsTransform.Column[1] { new WordEmbeddingsTransform.Column { Name = "Features", Source = "WordEmbeddings_TransformedText" } }, ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, }, text); // Train var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); return(trainer.Train(trainRoles)); } }