public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { var options = new ClassifyOptions { ModelFilePath = Path.Combine(Settings.ModelDir, meta.Model) }; var classifier = new ClassifierFactory <NaiveBayesClassifier, SentenceFeatureExtractor>(options, SupportedLanguage.English); var sentence = doc.Sentences.Select(s => new Sentence { Text = s.Text, Words = s.Tokens }).First(); var result = classifier.Classify(sentence); doc.Sentences[0].Intent = new TextClassificationResult { Classifier = "BotSharpNBayesClassifier", Label = result.First().Item1, Confidence = (decimal)result.First().Item2 }; return(true); }
public void GenderTest() { var options = new ClassifyOptions { TrainingCorpusDir = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Gender") }; var classifier = new ClassifierFactory <WordFeatureExtractor>(options, SupportedLanguage.English); var corpus = GetLabeledCorpus(options); var tokenizer = new TokenizerFactory(new TokenizationOptions { Pattern = RegexTokenizer.WORD_PUNC }, SupportedLanguage.English); tokenizer.GetTokenizer <RegexTokenizer>(); corpus.ForEach(x => x.Words = tokenizer.Tokenize(x.Text)); classifier.Train(corpus); string text = "Bridget"; classifier.Classify(new Sentence { Text = text, Words = tokenizer.Tokenize(text) }); corpus.Shuffle(); var trainingData = corpus.Skip(2000).ToList(); classifier.Train(trainingData); var testData = corpus.Take(2000).ToList(); int correct = 0; testData.ForEach(td => { var classes = classifier.Classify(td); if (td.Label == classes[0].Item1) { correct++; } }); var accuracy = (float)correct / testData.Count; }
public void CookingTest() { var reader = new FasttextDataReader(); var sentences = reader.Read(new ReaderOptions { DataDir = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Text Classification", "cooking.stackexchange"), FileName = "cooking.stackexchange.txt" }); var tokenizer = new TokenizerFactory(new TokenizationOptions { }, SupportedLanguage.English); tokenizer.GetTokenizer <TreebankTokenizer>(); var newSentences = tokenizer.Tokenize(sentences.Select(x => x.Text).ToList()); for (int i = 0; i < newSentences.Count; i++) { newSentences[i].Label = sentences[i].Label; } sentences = newSentences.ToList(); sentences.Shuffle(); var options = new ClassifyOptions { ModelFilePath = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Text Classification", "cooking.stackexchange", "nb.model"), TrainingCorpusDir = Path.Combine(Configuration.GetValue <String>("MachineLearning:dataDir"), "Text Classification", "cooking.stackexchange"), Dimension = 100 }; var classifier = new ClassifierFactory <SentenceFeatureExtractor>(options, SupportedLanguage.English); var dataset = sentences.Split(0.7M); classifier.Train(dataset.Item1); int correct = 0; int total = 0; dataset.Item2.ForEach(td => { var classes = classifier.Classify(td); if (td.Label == classes[0].Item1) { correct++; } total++; }); var accuracy = (float)correct / total; Assert.IsTrue(accuracy > 0.5); }
public void SpookyAuthorIdentification() { var reader = new KaggleTextDataReader(); var sentences = reader.Read(new ReaderOptions { FileName = "train.csv" }); var tokenizer = new TokenizerFactory(new TokenizationOptions { }, SupportedLanguage.English); tokenizer.GetTokenizer <TreebankTokenizer>(); var newSentences = tokenizer.Tokenize(sentences.Select(x => x.Text).ToList()); for (int i = 0; i < newSentences.Count; i++) { newSentences[i].Id = sentences[i].Id; newSentences[i].Label = sentences[i].Label; } sentences = newSentences.ToList(); sentences.Shuffle(); var dataset = sentences.Take(2000).ToList().Split(0.7M); var options = new ClassifyOptions { ModelDir = AppContext.BaseDirectory, ModelFilePath = Path.Combine(AppContext.BaseDirectory, "nb.model"), Dimension = 300 }; var classifier = new ClassifierFactory <SentenceFeatureExtractor>(options, SupportedLanguage.English); classifier.GetClassifer("NaiveBayesClassifier"); classifier.Train(dataset.Item1); int correct = 0; int total = 0; dataset.Item2.ForEach(td => { var classes = classifier.Classify(td); if (td.Label == classes[0].Item1) { correct++; } total++; }); var accuracy = (float)correct / total; Assert.IsTrue(accuracy > 0.5); }
public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta) { Init(meta); var sentence = doc.Sentences.Select(s => new Sentence { Text = s.Text, Words = s.Tokens }).First(); var result = _classifier.Classify(sentence); doc.Sentences[0].Intent = new TextClassificationResult { Classifier = "BotSharpIntentClassifier", Label = result.First().Item1, Confidence = result.First().Item2 }; return(true); }