static void Main() { var pipeline = new LearningPipeline { new TextLoader(FileName).CreateFrom <AgeRange>(separator: ',', useHeader: true), new Dictionarizer("Label"), new TextFeaturizer("Gender", "Gender"), new ColumnConcatenator("Features", "Age", "Gender"), new StochasticDualCoordinateAscentClassifier(), new PredictedLabelColumnOriginalValueConverter { PredictedLabelColumn = "PredictedLabel" } }; var model = pipeline.Train <AgeRange, AgeRangePrediction>(); var converter = new OnnxConverter { Onnx = OnnxPath, Json = OnnxAsJsonPath, Domain = "com.elbruno" }; converter.Convert(model); // Strip the version. var fileText = File.ReadAllText(OnnxAsJsonPath); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(OnnxAsJsonPath, fileText); }
public void WordEmbeddingsTest() { string dataPath = GetDataPath(@"small-sentiment-test.tsv"); var pipeline = new Legacy.LearningPipeline(0); pipeline.Add(new Legacy.Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = false, Column = new[] { new TextLoaderColumn() { Name = "Cat", Source = new [] { new TextLoaderRange(0, 3) }, Type = Legacy.Data.DataKind.TX }, } } }); var modelPath = GetDataPath(@"shortsentiment.emd"); var embed = new WordEmbeddings() { CustomLookupTable = modelPath }; embed.AddColumn("Cat", "Cat"); pipeline.Add(embed); var model = pipeline.Train <EmbeddingsData, EmbeddingsResult>(); var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "WordEmbeddings"); var onnxPath = GetOutputPath(subDir, "WordEmbeddings.onnx"); DeleteOutputPath(onnxPath); var onnxAsJsonPath = GetOutputPath(subDir, "WordEmbeddings.json"); DeleteOutputPath(onnxAsJsonPath); OnnxConverter converter = new OnnxConverter() { Onnx = onnxPath, Json = onnxAsJsonPath, Domain = "Onnx" }; converter.Convert(model); var fileText = File.ReadAllText(onnxAsJsonPath); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(onnxAsJsonPath, fileText); CheckEquality(subDir, "WordEmbeddings.json"); Done(); }
public void BinaryClassificationSaveModelToOnnxTest() { string dataPath = GetDataPath(@"breast-cancer.txt"); var pipeline = new LearningPipeline(); pipeline.Add(new Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(0) }, Type = Data.DataKind.Num }, new TextLoaderColumn() { Name = "Features", Source = new [] { new TextLoaderRange(1, 9) }, Type = Data.DataKind.Num } } } }); pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); var model = pipeline.Train <BreastCancerData, BreastCancerPrediction>(); var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); var onnxPath = GetOutputPath(subDir, "SaveModelToOnnxTest.pb"); DeleteOutputPath(onnxPath); var onnxAsJsonPath = GetOutputPath(subDir, "SaveModelToOnnxTest.json"); DeleteOutputPath(onnxAsJsonPath); OnnxConverter converter = new OnnxConverter() { InputsToDrop = new[] { "Label" }, OutputsToDrop = new[] { "Label", "Features" }, Onnx = onnxPath, Json = onnxAsJsonPath, Domain = "Onnx" }; converter.Convert(model); CheckEquality(subDir, "SaveModelToOnnxTest.json"); Done(); }
public void KmeansTest() { string dataPath = GetDataPath(@"breast-cancer.txt"); var pipeline = new Legacy.LearningPipeline(0); pipeline.Add(new Legacy.Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Features", Source = new [] { new TextLoaderRange(1, 9) }, Type = Legacy.Data.DataKind.R4 }, } } }); pipeline.Add(new KMeansPlusPlusClusterer() { K = 2, MaxIterations = 1, NumThreads = 1, InitAlgorithm = KMeansPlusPlusTrainerInitAlgorithm.Random }); var model = pipeline.Train <BreastNumericalColumns, BreastCancerClusterPrediction>(); var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Cluster", "BreastCancer"); var onnxPath = GetOutputPath(subDir, "Kmeans.onnx"); DeleteOutputPath(onnxPath); var onnxAsJsonPath = GetOutputPath(subDir, "Kmeans.json"); DeleteOutputPath(onnxAsJsonPath); OnnxConverter converter = new OnnxConverter() { Onnx = onnxPath, Json = onnxAsJsonPath, Domain = "Onnx" }; converter.Convert(model); // Strip the version. var fileText = File.ReadAllText(onnxAsJsonPath); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(onnxAsJsonPath, fileText); CheckEquality(subDir, "Kmeans.json"); Done(); }
public static void SaveToOnnx(PredictionModel model) { //Sauvegarde sous format ONNX (pas encore fonctionnel pour multiclass à la version 0.3) OnnxConverter converter = new OnnxConverter() { InputsToDrop = new[] { "Label" }, OutputsToDrop = new[] { "Label", "Features" }, Onnx = _onnxPath, Domain = "Onnx" }; converter.Convert(model); }
private static void ConvertToOnnx(PredictionModel model) { try { OnnxConverter converter = new OnnxConverter() { InputsToDrop = new[] { "Label" }, OutputsToDrop = new[] { "Label", "Features" }, Onnx = _onnxPath, Json = _onnxAsJsonPath, Domain = "com.mydomain" }; converter.Convert(model); // Strip the version. var fileText = File.ReadAllText(_onnxAsJsonPath); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(_onnxAsJsonPath, fileText); } catch (Exception e) { System.Console.WriteLine(e); } }
private void Export() { var onnxPath = "./SaveModelToOnnxTest.onnx"; var onnxAsJsonPath = "./SaveModelToOnnxTest.json"; OnnxConverter converter = new OnnxConverter() { InputsToDrop = new[] { "Label" }, OutputsToDrop = new[] { "Label", "Features" }, Onnx = onnxPath, Json = onnxAsJsonPath, Domain = "com.mydomain" }; converter.Convert(_model); // Strip the version. var fileText = File.ReadAllText(onnxAsJsonPath); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(onnxAsJsonPath, fileText); }
public void MultiClassificationLRSaveModelToOnnxTest() { string dataPath = GetDataPath(@"breast-cancer.txt"); var pipeline = new Legacy.LearningPipeline(); pipeline.Add(new Legacy.Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(0) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Features", Source = new [] { new TextLoaderRange(1, 9) }, Type = Legacy.Data.DataKind.Num } } } }); pipeline.Add(new Dictionarizer("Label")); pipeline.Add(new LogisticRegressionClassifier() { UseThreads = false }); var model = pipeline.Train <BreastCancerDataAllColumns, BreastCancerMCPrediction>(); var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "MultiClassClassification", "BreastCancer"); var onnxPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.onnx"); DeleteOutputPath(onnxPath); var onnxAsJsonPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.json"); DeleteOutputPath(onnxAsJsonPath); OnnxConverter converter = new OnnxConverter() { InputsToDrop = new[] { "Label" }, OutputsToDrop = new[] { "Label", "Features" }, Onnx = onnxPath, Json = onnxAsJsonPath, Domain = "Onnx" }; converter.Convert(model); // Strip the version. var fileText = File.ReadAllText(onnxAsJsonPath); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(onnxAsJsonPath, fileText); CheckEquality(subDir, "MultiClassificationLRSaveModelToOnnxTest.json"); Done(); }
public void KeyToVectorWithBagTest() { string dataPath = GetDataPath(@"breast-cancer.txt"); var pipeline = new Legacy.LearningPipeline(); pipeline.Add(new Legacy.Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(0) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "F1", Source = new [] { new TextLoaderRange(1, 1) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "F2", Source = new [] { new TextLoaderRange(2, 2) }, Type = Legacy.Data.DataKind.TX } } } }); var vectorizer = new CategoricalOneHotVectorizer(); var categoricalColumn = new CategoricalTransformColumn() { OutputKind = CategoricalTransformOutputKind.Bag, Name = "F2", Source = "F2" }; vectorizer.Column = new CategoricalTransformColumn[1] { categoricalColumn }; pipeline.Add(vectorizer); pipeline.Add(new ColumnConcatenator("Features", "F1", "F2")); pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 2, NumTrees = 1, MinDocumentsInLeafs = 2 }); var model = pipeline.Train <BreastCancerData, BreastCancerPrediction>(); var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); var onnxPath = GetOutputPath(subDir, "KeyToVectorBag.onnx"); DeleteOutputPath(onnxPath); var onnxAsJsonPath = GetOutputPath(subDir, "KeyToVectorBag.json"); DeleteOutputPath(onnxAsJsonPath); OnnxConverter converter = new OnnxConverter() { InputsToDrop = new[] { "Label" }, OutputsToDrop = new[] { "Label", "F1", "F2", "Features" }, Onnx = onnxPath, Json = onnxAsJsonPath, Domain = "Onnx" }; converter.Convert(model); // Strip the version. var fileText = File.ReadAllText(onnxAsJsonPath); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(onnxAsJsonPath, fileText); CheckEquality(subDir, "KeyToVectorBag.json"); Done(); }
public async Task <string> TrainAsync(string trainpath, bool writeToDisk = true) { // pipeline encapsulates the data loading, data processing/featurization, and learning algorithm var pipeline = new LearningPipeline { // load from CSV --> SubCategory, Description, Bank, Amount, new TextLoader(trainpath).CreateFrom <BankStatementLineItem>(separator: ',', useHeader: true), //Converts input values (words, numbers, etc.) to index in a dictionary. new Dictionarizer(("SubCategory", "Label")), // convert the data columns to the feature. For that TextFeaturizer // ngram analysis over the transaction description new TextFeaturizer("Description", "Description") { TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, WordFeatureExtractor = new NGramNgramExtractor { // Term frequency -- the number of times that term t occurs in document d Weighting = NgramTransformWeightingCriteria.Tf } }, new TextFeaturizer("Bank", "Bank") { TextCase = TextNormalizerTransformCaseNormalizationMode.Lower }, // feature column using bank and description new ColumnConcatenator("Features", "Bank", "Description"), //******************************************************************** // classifiers //******************************************************************** //new NaiveBayesClassifier(), new StochasticDualCoordinateAscentClassifier { Shuffle = false, NumThreads = 1 }, //new LightGbmClassifier(), //******************************************************************** //Transforms a predicted label column to its original values, unless it is of type bool new PredictedLabelColumnOriginalValueConverter { PredictedLabelColumn = "PredictedLabel" } }; //******************************************************************** // training //******************************************************************** Console.WriteLine("=============== Start training ==============="); var watch = Stopwatch.StartNew(); _model = pipeline.Train <BankStatementLineItem, PredictedLabel>(); watch.Stop(); Console.WriteLine($"=============== End training ==============="); Console.WriteLine($"training took {watch.ElapsedMilliseconds} milliseconds"); Console.WriteLine("The model is saved to {0}", PredictionModelWrapper.Model1Path); //******************************************************************** var converter = new OnnxConverter { Onnx = PredictionModelWrapper.Model1Path, Json = PredictionModelWrapper.Model1Path.Replace(".onnx", ".json"), Domain = "onnx" }; converter.Convert(_model); if (writeToDisk) { await _model.WriteAsync(PredictionModelWrapper.Model1Path); // Strip the version. var fileText = File.ReadAllText(converter.Json); fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); File.WriteAllText(converter.Json, fileText); } return(PredictionModelWrapper.Model1Path); }