예제 #1
0
파일: Program.cs 프로젝트: vadimas/events
        static void Main()
        {
            var pipeline = new LearningPipeline
            {
                new TextLoader(FileName).CreateFrom <AgeRange>(separator: ',', useHeader: true),
                new Dictionarizer("Label"),
                new TextFeaturizer("Gender", "Gender"),
                new ColumnConcatenator("Features", "Age", "Gender"),
                new StochasticDualCoordinateAscentClassifier(),
                new PredictedLabelColumnOriginalValueConverter {
                    PredictedLabelColumn = "PredictedLabel"
                }
            };
            var model = pipeline.Train <AgeRange, AgeRangePrediction>();

            var converter = new OnnxConverter
            {
                Onnx   = OnnxPath,
                Json   = OnnxAsJsonPath,
                Domain = "com.elbruno"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(OnnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(OnnxAsJsonPath, fileText);
        }
예제 #2
0
        public void WordEmbeddingsTest()
        {
            string dataPath = GetDataPath(@"small-sentiment-test.tsv");
            var    pipeline = new Legacy.LearningPipeline(0);

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = false,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Cat",
                            Source = new [] { new TextLoaderRange(0, 3) },
                            Type   = Legacy.Data.DataKind.TX
                        },
                    }
                }
            });

            var modelPath = GetDataPath(@"shortsentiment.emd");
            var embed     = new WordEmbeddings()
            {
                CustomLookupTable = modelPath
            };

            embed.AddColumn("Cat", "Cat");
            pipeline.Add(embed);
            var model = pipeline.Train <EmbeddingsData, EmbeddingsResult>();

            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "WordEmbeddings");
            var onnxPath = GetOutputPath(subDir, "WordEmbeddings.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "WordEmbeddings.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                Onnx   = onnxPath,
                Json   = onnxAsJsonPath,
                Domain = "Onnx"
            };

            converter.Convert(model);

            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "WordEmbeddings.json");
            Done();
        }
예제 #3
0
        public void BinaryClassificationSaveModelToOnnxTest()
        {
            string dataPath = GetDataPath(@"breast-cancer.txt");
            var    pipeline = new LearningPipeline();

            pipeline.Add(new Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Features",
                            Source = new [] { new TextLoaderRange(1, 9) },
                            Type   = Data.DataKind.Num
                        }
                    }
                }
            });

            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            var model    = pipeline.Train <BreastCancerData, BreastCancerPrediction>();
            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer");
            var onnxPath = GetOutputPath(subDir, "SaveModelToOnnxTest.pb");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "SaveModelToOnnxTest.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                InputsToDrop  = new[] { "Label" },
                OutputsToDrop = new[] { "Label", "Features" },
                Onnx          = onnxPath,
                Json          = onnxAsJsonPath,
                Domain        = "Onnx"
            };

            converter.Convert(model);

            CheckEquality(subDir, "SaveModelToOnnxTest.json");
            Done();
        }
예제 #4
0
        public void KmeansTest()
        {
            string dataPath = GetDataPath(@"breast-cancer.txt");
            var    pipeline = new Legacy.LearningPipeline(0);

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Features",
                            Source = new [] { new TextLoaderRange(1, 9) },
                            Type   = Legacy.Data.DataKind.R4
                        },
                    }
                }
            });

            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = 2, MaxIterations = 1, NumThreads = 1, InitAlgorithm = KMeansPlusPlusTrainerInitAlgorithm.Random
            });
            var model    = pipeline.Train <BreastNumericalColumns, BreastCancerClusterPrediction>();
            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Cluster", "BreastCancer");
            var onnxPath = GetOutputPath(subDir, "Kmeans.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "Kmeans.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                Onnx   = onnxPath,
                Json   = onnxAsJsonPath,
                Domain = "Onnx"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "Kmeans.json");
            Done();
        }
예제 #5
0
        public static void SaveToOnnx(PredictionModel model)
        {
            //Sauvegarde sous format ONNX (pas encore fonctionnel pour multiclass à la version 0.3)
            OnnxConverter converter = new OnnxConverter()
            {
                InputsToDrop  = new[] { "Label" },
                OutputsToDrop = new[] { "Label", "Features" },
                Onnx          = _onnxPath,
                Domain        = "Onnx"
            };

            converter.Convert(model);
        }
예제 #6
0
 private static void ConvertToOnnx(PredictionModel model)
 {
     try {
         OnnxConverter converter = new OnnxConverter()
         {
             InputsToDrop  = new[] { "Label" },
             OutputsToDrop = new[] { "Label", "Features" },
             Onnx          = _onnxPath,
             Json          = _onnxAsJsonPath,
             Domain        = "com.mydomain"
         };
         converter.Convert(model);
         // Strip the version.
         var fileText = File.ReadAllText(_onnxAsJsonPath);
         fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
         File.WriteAllText(_onnxAsJsonPath, fileText);
     } catch (Exception e) {
         System.Console.WriteLine(e);
     }
 }
예제 #7
0
        private void Export()
        {
            var onnxPath       = "./SaveModelToOnnxTest.onnx";
            var onnxAsJsonPath = "./SaveModelToOnnxTest.json";

            OnnxConverter converter = new OnnxConverter()
            {
                InputsToDrop  = new[] { "Label" },
                OutputsToDrop = new[] { "Label", "Features" },
                Onnx          = onnxPath,
                Json          = onnxAsJsonPath,
                Domain        = "com.mydomain"
            };

            converter.Convert(_model);

            // Strip the version.
            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);
        }
예제 #8
0
        public void MultiClassificationLRSaveModelToOnnxTest()
        {
            string dataPath = GetDataPath(@"breast-cancer.txt");
            var    pipeline = new Legacy.LearningPipeline();

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Features",
                            Source = new [] { new TextLoaderRange(1, 9) },
                            Type   = Legacy.Data.DataKind.Num
                        }
                    }
                }
            });

            pipeline.Add(new Dictionarizer("Label"));
            pipeline.Add(new LogisticRegressionClassifier()
            {
                UseThreads = false
            });

            var model    = pipeline.Train <BreastCancerDataAllColumns, BreastCancerMCPrediction>();
            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "MultiClassClassification", "BreastCancer");
            var onnxPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                InputsToDrop  = new[] { "Label" },
                OutputsToDrop = new[] { "Label", "Features" },
                Onnx          = onnxPath,
                Json          = onnxAsJsonPath,
                Domain        = "Onnx"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "MultiClassificationLRSaveModelToOnnxTest.json");
            Done();
        }
예제 #9
0
        public void KeyToVectorWithBagTest()
        {
            string dataPath = GetDataPath(@"breast-cancer.txt");
            var    pipeline = new Legacy.LearningPipeline();

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "F1",
                            Source = new [] { new TextLoaderRange(1, 1) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "F2",
                            Source = new [] { new TextLoaderRange(2, 2) },
                            Type   = Legacy.Data.DataKind.TX
                        }
                    }
                }
            });

            var vectorizer        = new CategoricalOneHotVectorizer();
            var categoricalColumn = new CategoricalTransformColumn()
            {
                OutputKind = CategoricalTransformOutputKind.Bag, Name = "F2", Source = "F2"
            };

            vectorizer.Column = new CategoricalTransformColumn[1] {
                categoricalColumn
            };
            pipeline.Add(vectorizer);
            pipeline.Add(new ColumnConcatenator("Features", "F1", "F2"));
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 2, NumTrees = 1, MinDocumentsInLeafs = 2
            });

            var model    = pipeline.Train <BreastCancerData, BreastCancerPrediction>();
            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer");
            var onnxPath = GetOutputPath(subDir, "KeyToVectorBag.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "KeyToVectorBag.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                InputsToDrop  = new[] { "Label" },
                OutputsToDrop = new[] { "Label", "F1", "F2", "Features" },
                Onnx          = onnxPath,
                Json          = onnxAsJsonPath,
                Domain        = "Onnx"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "KeyToVectorBag.json");
            Done();
        }
예제 #10
0
파일: Predict.cs 프로젝트: lulzzz/vita
        public async Task <string> TrainAsync(string trainpath, bool writeToDisk = true)
        {
            // pipeline encapsulates the data loading, data processing/featurization, and learning algorithm
            var pipeline = new LearningPipeline
            {
                // load from CSV --> SubCategory, Description, Bank, Amount,
                new TextLoader(trainpath).CreateFrom <BankStatementLineItem>(separator: ',', useHeader: true),

                //Converts input values (words, numbers, etc.) to index in a dictionary.
                new Dictionarizer(("SubCategory", "Label")),

                // convert the data columns to the feature. For that TextFeaturizer
                // ngram analysis over the transaction description
                new TextFeaturizer("Description", "Description")
                {
                    TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                    WordFeatureExtractor = new NGramNgramExtractor
                    {
                        // Term frequency -- the number of times that term t occurs in document d
                        Weighting = NgramTransformWeightingCriteria.Tf
                    }
                },
                new TextFeaturizer("Bank", "Bank")
                {
                    TextCase = TextNormalizerTransformCaseNormalizationMode.Lower
                },
                // feature column using bank and description
                new ColumnConcatenator("Features", "Bank", "Description"),

                //********************************************************************
                // classifiers
                //********************************************************************
                //new NaiveBayesClassifier(),
                new StochasticDualCoordinateAscentClassifier {
                    Shuffle = false, NumThreads = 1
                },
                //new LightGbmClassifier(),
                //********************************************************************

                //Transforms a predicted label column to its original values, unless it is of type bool
                new PredictedLabelColumnOriginalValueConverter {
                    PredictedLabelColumn = "PredictedLabel"
                }
            };

            //********************************************************************
            // training
            //********************************************************************
            Console.WriteLine("=============== Start training ===============");

            var watch = Stopwatch.StartNew();

            _model = pipeline.Train <BankStatementLineItem, PredictedLabel>();

            watch.Stop();

            Console.WriteLine($"=============== End training ===============");
            Console.WriteLine($"training took {watch.ElapsedMilliseconds} milliseconds");
            Console.WriteLine("The model is saved to {0}", PredictionModelWrapper.Model1Path);
            //********************************************************************

            var converter = new OnnxConverter
            {
                Onnx   = PredictionModelWrapper.Model1Path,
                Json   = PredictionModelWrapper.Model1Path.Replace(".onnx", ".json"),
                Domain = "onnx"
            };

            converter.Convert(_model);

            if (writeToDisk)
            {
                await _model.WriteAsync(PredictionModelWrapper.Model1Path);

                // Strip the version.
                var fileText = File.ReadAllText(converter.Json);
                fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"",
                                         "\"producerVersion\": \"##VERSION##\"");
                File.WriteAllText(converter.Json, fileText);
            }


            return(PredictionModelWrapper.Model1Path);
        }