public void OneHotTest() { var reader = new FasttextDataReader(); var sentences = reader.Read(new ReaderOptions { DataDir = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Text Classification", "cooking.stackexchange"), FileName = "cooking.stackexchange.txt" }); var tokenizer = new TokenizerFactory(new TokenizationOptions { }, SupportedLanguage.English); tokenizer.GetTokenizer <TreebankTokenizer>(); var newSentences = tokenizer.Tokenize(sentences.Select(x => x.Text).ToList()); for (int i = 0; i < newSentences.Count; i++) { newSentences[i].Label = sentences[i].Label; } sentences = newSentences.ToList(); var encoder = new OneHotEncoder(); encoder.Sentences = sentences; encoder.EncodeAll(); }
public void CookingTest() { var reader = new FasttextDataReader(); var sentences = reader.Read(new ReaderOptions { DataDir = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Text Classification", "cooking.stackexchange"), FileName = "cooking.stackexchange.txt" }); var tokenizer = new TokenizerFactory(new TokenizationOptions { }, SupportedLanguage.English); tokenizer.GetTokenizer <TreebankTokenizer>(); var newSentences = tokenizer.Tokenize(sentences.Select(x => x.Text).ToList()); for (int i = 0; i < newSentences.Count; i++) { newSentences[i].Label = sentences[i].Label; } sentences = newSentences.ToList(); sentences.Shuffle(); var options = new ClassifyOptions { ModelFilePath = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Text Classification", "cooking.stackexchange", "nb.model"), TrainingCorpusDir = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Text Classification", "cooking.stackexchange"), Dimension = 100 }; var classifier = new ClassifierFactory <SentenceFeatureExtractor>(options, SupportedLanguage.English); var dataset = sentences.Split(0.7M); classifier.Train(dataset.Item1); int correct = 0; int total = 0; dataset.Item2.ForEach(td => { var classes = classifier.Classify(td); if (td.Label == classes[0].Item1) { correct++; } total++; }); var accuracy = (float)correct / total; Assert.IsTrue(accuracy > 0.5); }