public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("tokenizer", Method.GET); List <List <NlpToken> > tokens = new List <List <NlpToken> >(); Boolean res = true; var dc = new DefaultDataContextLoader().GetDefaultDc(); var corpus = agent.Corpus; doc.Sentences = new List <NlpDocSentence>(); corpus.UserSays.ForEach(usersay => { Console.WriteLine(usersay.Text); request.AddParameter("text", usersay.Text); var response = client.Execute <Result>(request); tokens.Add(response.Data.Tokens); doc.Sentences.Add(new NlpDocSentence { Tokens = response.Data.Tokens, Text = usersay.Text }); res = res && response.IsSuccessful; }); return(res); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { var options = new ClassifyOptions { ModelFilePath = Path.Combine(Settings.ModelDir, meta.Model) }; var classifier = new ClassifierFactory <NaiveBayesClassifier, SentenceFeatureExtractor>(options, SupportedLanguage.English); var sentence = doc.Sentences.Select(s => new Sentence { Text = s.Text, Words = s.Tokens }).First(); var result = classifier.Classify(sentence); doc.Sentences[0].Intent = new TextClassificationResult { Classifier = "BotSharpNBayesClassifier", Label = result.First().Item1, Confidence = (decimal)result.First().Item2 }; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("tokenizer", Method.POST); List <List <Token> > tokens = new List <List <Token> >(); Boolean res = true; var corpus = agent.Corpus; doc.Sentences = new List <NlpDocSentence>(); List <string> sentencesList = new List <string>(); corpus.UserSays.ForEach(usersay => sentencesList.Add(usersay.Text)); request.RequestFormat = DataFormat.Json; request.Method = Method.POST; request.AddHeader("Content-Type", "application/json; charset=utf-8"); request.AddParameter("application/json", JsonConvert.SerializeObject(new Documents(sentencesList)), ParameterType.RequestBody); var response = client.Execute <Result>(request); tokens = response.Data.TokensList; for (int i = 0; i < sentencesList.Count; i++) { doc.Sentences.Add(new NlpDocSentence { Tokens = tokens[i], Text = sentencesList[i] }); } res = res && response.IsSuccessful; return(res); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient($"{Configuration.GetSection("WitAiEntityRecognizer:url").Value}"); var request = new RestRequest(Configuration.GetSection("WitAiEntityRecognizer:resource").Value, Method.GET); request.AddHeader("Authorization", "Bearer " + Configuration.GetSection("WitAiEntityRecognizer:serverAccessToken").Value); request.AddQueryParameter("v", Configuration.GetSection("WitAiEntityRecognizer:version").Value); request.AddQueryParameter("q", doc.Sentences[0].Text); request.AddQueryParameter("verbose", "true"); request.AddQueryParameter("autosuggest", "true"); var result = client.Execute <WitAiResponse>(request); var entities = result.Data.Entities[0]; if (entities.Datetime != null) { doc.Sentences[0].Entities.AddRange(entities.Datetime.Select(x => Map(x))); } if (entities.Location != null) { doc.Sentences[0].Entities.AddRange(entities.Location.Select(x => Map(x))); } return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { meta.Model = "classification-svm.model"; string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-svm.parsed.txt"); string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); List <string> labels = new List <string>(); List <string> sentences = new List <string>(); agent.Corpus.UserSays.ForEach(x => { agent.Intents.ForEach(intent => { if (intent.Name == x.Intent) { labels.Add(agent.Intents.IndexOf(intent).ToString()); } }); sentences.Add(x.Text); }); NLP.Classify.SVMClassifier svmClassifier = new NLP.Classify.SVMClassifier(); Args args = new Args(); args.ModelFile = Path.Combine(Configuration.GetValue <String>("BotSharpSVMClassifier:wordvec"), "wordvec_enu.bin"); var featureSetList = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).Sentence2Vec(sentences), labels); /* * // try using spacy doc2vec * var client = new RestClient("http://10.2.21.200:5005"); * var request = new RestRequest("batchdoc2vec", Method.POST); * request.RequestFormat = DataFormat.Json; * * request.AddParameter("application/json", JsonConvert.SerializeObject(new {Sentences = sentences}), ParameterType.RequestBody); * * var response = client.Execute<Result>(request); * Result res = JsonConvert.DeserializeObject<Result>(response.Content); * * List<Vec> vecs = new List<Vec>(); * foreach (List<double> cur in res.Doc2vecList) * { * Vec vec = new Vec(); * vec.VecNodes = cur; * vecs.Add(vec); * } * List<LabeledFeatureSet> featureSetList = svmClassifier.FeatureSetsGenerator(vecs, labels); * // */ ClassifyOptions classifyOptions = new ClassifyOptions(); classifyOptions.ModelFilePath = Path.Combine(Settings.ModelDir, "svm_classifier_model"); classifyOptions.TransformFilePath = Path.Combine(Settings.ModelDir, "transform_obj_data"); // svmClassifier.Train(featureSetList, classifyOptions); meta.Meta = new JObject(); meta.Meta["compiled at"] = "Aug 31, 2018"; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { doc.Sentences.ForEach(x => _tagger.Tag(new Sentence { Words = x.Tokens })); return(true); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); string predictFileName = Path.Combine(Settings.TempDir, "svm-predict-tempfile.txt"); File.WriteAllText(predictFileName, doc.Sentences[0].Text); var svmClassifier = new NLP.Classify.SVMClassifier(); Args args = new Args(); args.ModelFile = Path.Combine(Configuration.GetValue <String>("BotSharpSVMClassifier:wordvec"), "wordvec_enu.bin"); var featureSet = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).SingleSentence2Vec(doc.Sentences[0].Text), ""); /* * // * var client = new RestClient("http://10.2.21.200:5005"); * var request = new RestRequest("doc2vec", Method.GET); * request.AddParameter("text", doc.Sentences[0].Text); * var response = client.Execute<PredResult>(request); * PredResult pred = JsonConvert.DeserializeObject<PredResult>(response.Content); * * Vec vec = new Vec(); * vec.VecNodes = pred.Doc2Vec; * * LabeledFeatureSet featureSet = svmClassifier.FeatureSetsGenerator(vec, ""); * // */ ClassifyOptions classifyOptions = new ClassifyOptions(); classifyOptions.Model = SVM.BotSharp.MachineLearning.Model.Read(Path.Combine(Settings.ModelDir, "svm_classifier_model")); classifyOptions.Transform = SVM.BotSharp.MachineLearning.RangeTransform.Read(Path.Combine(Settings.ModelDir, "transform_obj_data")); double[][] d = svmClassifier.Predict(featureSet, classifyOptions); string intent = null; decimal confidence = 0; double max = Double.MinValue; for (int i = 0; i < d[0].Count(); i++) { if (d[0][i] > max) { max = d[0][i]; intent = agent.Intents[i].Name; confidence = (decimal)d[0][i]; } } File.Delete(predictFileName); doc.Sentences[0].Intent = new TextClassificationResult { Classifier = "SVMClassifier", Label = intent, Confidence = confidence }; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("load", Method.GET); var response = client.Execute <Result>(request); meta.Meta = JObject.FromObject(response.Data); meta.Meta.Remove("models"); meta.Model = response.Data.Models; return(response.IsSuccessful); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { meta.Model = "classification-fasttext.model"; string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-fasttext.parsed.txt"); string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); // assemble corpus StringBuilder corpus = new StringBuilder(); agent.Corpus.UserSays.ForEach(x => corpus.AppendLine($"__label__{x.Intent} {x.Text}")); List <string> labels = new List <string>(); List <string> sentences = new List <string>(); agent.Corpus.UserSays.ForEach(x => { labels.Add(x.Intent); sentences.Add(x.Text); }); Dictionary <string, string> labelDic = new Dictionary <string, string>(); int num = 0; foreach (string label in labels) { if (labelDic.ContainsKey(label)) { continue; } labelDic.Add(label, num++.ToString()); } ; List <string> labelNums = new List <string>(); foreach (string label in labels) { labelNums.Add(labelDic[label]); } NLP.Classify.SVMClassifier svmClassifier = new NLP.Classify.SVMClassifier(); Args args = new Args(); args.WordDecoderModelFile = Path.Combine(Settings.ModelDir, "wordvec_enu.bin"); List <LabeledFeatureSet> featureSetList = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).Sentence2Vec(sentences), labelNums); svmClassifier.Train(featureSetList, new ClassifyOptions(Path.Combine(Settings.ModelDir, "svm_classifier_model"))); meta.Meta = new JObject(); meta.Meta["compiled at"] = "Aug 31, 2018"; return(true); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { doc.Tokenizer = this; // same as train doc.Sentences.ForEach(snt => { snt.Tokens = _tokenizer.Tokenize(snt.Text); }); return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("entitize", Method.GET); request.AddParameter("text", ""); var response = client.Execute <Result>(request); //data.Add("Entities", JToken.FromObject(response.Data.Entities)); return(response.IsSuccessful); }
public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta) { var decoder = new CRFDecoder(); var options = new DecoderOptions { ModelFileName = System.IO.Path.Combine(Settings.ModelDir, meta.Model) }; //Load encoded model from file decoder.LoadModel(options.ModelFileName); //Create decoder tagger instance. var tagger = decoder.CreateTagger(options.NBest, options.MaxWord); tagger.set_vlevel(options.ProbLevel); //Initialize result var crf_out = new CRFSegOut[options.NBest]; for (var i = 0; i < options.NBest; i++) { crf_out[i] = new CRFSegOut(options.MaxWord); } doc.Sentences.ForEach(sent => { List <List <String> > dataset = new List <List <string> >(); dataset.AddRange(sent.Tokens.Select(token => new List <String> { token.Text, token.Pos }).ToList()); //predict given string's tags decoder.Segment(crf_out, tagger, dataset); var entities = new List <NlpEntity>(); for (int i = 0; i < sent.Tokens.Count; i++) { var entity = crf_out[0].result_; entities.Add(new NlpEntity { Entity = entity[i], Start = doc.Sentences[0].Tokens[i].Start, Value = doc.Sentences[0].Tokens[i].Text, Confidence = 0, Extrator = "BotSharpNER" }); } sent.Entities = MergeEntity(doc.Sentences[0].Text, entities); }); return(true); }
public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta) { Init(); doc.Sentences.ForEach(x => _tagger.Tag(new Sentence { Words = x.Tokens, Text = x.Text })); return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var dc = new DefaultDataContextLoader().GetDefaultDc(); var corpus = agent.Corpus; meta.Model = "ner-crf.model"; List <TrainingIntentExpression <TrainingIntentExpressionPart> > userSays = corpus.UserSays; List <List <TrainingData> > list = new List <List <TrainingData> >(); string rawTrainingDataFileName = Path.Join(Settings.TrainDir, "ner-crf.corpus.txt"); string parsedTrainingDataFileName = Path.Join(Settings.TrainDir, "ner-crf.parsed.txt"); string modelFileName = Path.Join(Settings.ModelDir, meta.Model); using (FileStream fs = new FileStream(rawTrainingDataFileName, FileMode.Create)) { using (StreamWriter sw = new StreamWriter(fs)) { for (int i = 0; i < doc.Sentences.Count; i++) { List <TrainingData> curLine = Merge(doc.Sentences[i].Tokens, userSays[i].Entities); curLine.ForEach(trainingData => { string[] wordParams = { trainingData.Entity, trainingData.Token, trainingData.Pos, trainingData.Chunk }; string wordStr = string.Join(" ", wordParams); sw.Write(wordStr + "\n"); }); list.Add(curLine); sw.Write("\n"); } sw.Flush(); } } var fields = Configuration.GetValue <String>($"CRFsuiteEntityRecognizer:fields"); var uniFeatures = Configuration.GetValue <String>($"CRFsuiteEntityRecognizer:uniFeatures"); var biFeatures = Configuration.GetValue <String>($"CRFsuiteEntityRecognizer:biFeatures"); new MachineLearning.CRFsuite.Ner() .NerStart(rawTrainingDataFileName, parsedTrainingDataFileName, fields, uniFeatures.Split(" "), biFeatures.Split(" ")); var algorithmDir = Path.Join(AppDomain.CurrentDomain.GetData("ContentRootPath").ToString(), "Algorithms"); CmdHelper.Run(Path.Join(algorithmDir, "crfsuite"), $"learn -m {modelFileName} {parsedTrainingDataFileName}"); // --split=3 -x Console.WriteLine($"Saved model to {modelFileName}"); meta.Meta = new JObject(); meta.Meta["fields"] = fields; meta.Meta["uniFeatures"] = uniFeatures; meta.Meta["biFeatures"] = biFeatures; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("NltkProvider:Url").Value); var request = new RestRequest("nltktokenizesentences", Method.POST); List <List <Token> > tokens = new List <List <Token> >(); Boolean res = true; var dc = new DefaultDataContextLoader().GetDefaultDc(); var corpus = agent.Corpus; doc.Sentences = new List <NlpDocSentence>(); List <string> sentencesList = new List <string>(); corpus.UserSays.ForEach(usersay => sentencesList.Add(usersay.Text)); request.RequestFormat = DataFormat.Json; request.AddParameter("application/json", JsonConvert.SerializeObject(new Documents(sentencesList)), ParameterType.RequestBody); var response = client.Execute <Result>(request); tokens = response.Data.TokensList; for (int i = 0; i < sentencesList.Count; i++) { doc.Sentences.Add(new NlpDocSentence { Tokens = tokens[i], Text = sentencesList[i] }); } res = res && response.IsSuccessful; return(res); /* * corpus.UserSays.ForEach(usersay => { * Console.WriteLine(usersay.Text); * request.AddParameter("text", usersay.Text); * var response = client.Execute<Result>(request); * * tokens.Add(response.Data.Tokens); * * doc.Sentences.Add(new NlpDocSentence * { * Tokens = response.Data.Tokens, * Text = usersay.Text * }); * * res = res && response.IsSuccessful; * }); */ }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { doc.Tokenizer = this; doc.Sentences = new List <NlpDocSentence>(); agent.Corpus.UserSays.ForEach(say => { doc.Sentences.Add(new NlpDocSentence { Tokens = _tokenizer.Tokenize(say.Text), Text = say.Text }); }); return(true); }
public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta) { var corpus = agent.Corpus; meta.Model = "ner-crf.model"; List <TrainingIntentExpression <TrainingIntentExpressionPart> > userSays = corpus.UserSays; List <List <TrainingData> > list = new List <List <TrainingData> >(); string rawTrainingDataFileName = System.IO.Path.Combine(Settings.ModelDir, "ner-crf.corpus.txt"); string modelFileName = System.IO.Path.Combine(Settings.ModelDir, meta.Model); using (FileStream fs = new FileStream(rawTrainingDataFileName, FileMode.Create)) { using (StreamWriter sw = new StreamWriter(fs)) { for (int i = 0; i < doc.Sentences.Count; i++) { List <TrainingData> curLine = Merge(doc, doc.Sentences[i].Tokens, userSays[i].Entities); curLine.ForEach(trainingData => { string[] wordParams = { trainingData.Token, trainingData.Pos, trainingData.Entity }; string wordStr = string.Join("\t", wordParams); sw.WriteLine(wordStr); }); list.Add(curLine); sw.WriteLine(); } sw.Flush(); } } string contentDir = AppDomain.CurrentDomain.GetData("DataPath").ToString(); string template = Configuration.GetValue <String>($"template"); template = template.Replace("|App_Data|", contentDir + System.IO.Path.DirectorySeparatorChar); var encoder = new CRFEncoder(); bool result = encoder.Learn(new EncoderOptions { TrainingCorpusFileName = rawTrainingDataFileName, TemplateFileName = template, ModelFileName = modelFileName, }); return(result); }
public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta) { Init(meta); var sentences = doc.Sentences.Select(x => new Sentence { Label = x.Intent.Label, Text = x.Text, Words = x.Tokens }).ToList(); _classifier.Train(sentences); Console.WriteLine($"Saved model to {Settings.ModelDir}"); return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("tagger", Method.GET); List <List <String> > tags = new List <List <String> >(); Boolean res = true; var dc = new DefaultDataContextLoader().GetDefaultDc(); var corpus = agent.Corpus; corpus.UserSays.ForEach(usersay => { request.AddParameter("text", usersay.Text); var response = client.Execute <Result>(request); tags.Add(response.Data.Tags); res = res && response.IsSuccessful; }); return(res); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("tokenizer", Method.GET); List <List <Token> > tokens = new List <List <Token> >(); Boolean res = true; var corpus = agent.Corpus; request.AddParameter("text", doc.Sentences[0].Text); var response = client.Execute <Result>(request); tokens = response.Data.TokensList; res = res && response.IsSuccessful; doc.Sentences[0].Tokens = tokens[0]; return(true); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { string modelFileName = Path.Join(Settings.ModelDir, meta.Model); string predictFileName = Path.Join(Settings.PredictDir, "fasttext.txt"); File.WriteAllText(predictFileName, doc.Sentences[0].Text); var output = Engines.Classifiers.CmdHelper.Run(Path.Join(Settings.AlgorithmDir, "fasttext"), $"predict-prob {modelFileName}.bin {predictFileName}"); File.Delete(predictFileName); doc.Sentences[0].Intent = new TextClassificationResult { Label = output.Split(' ')[0].Split("__label__")[1], Confidence = decimal.Parse(output.Split(' ')[1]) }; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { meta.Model = "classification-fasttext.model"; string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-fasttext.parsed.txt"); string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); // assemble corpus StringBuilder corpus = new StringBuilder(); agent.Corpus.UserSays.ForEach(x => corpus.AppendLine($"__label__{x.Intent} {x.Text}")); File.WriteAllText(parsedTrainingDataFileName, corpus.ToString()); var output = CmdHelper.Run(Path.Combine(Settings.AlgorithmDir, "fasttext"), $"supervised -input \"{parsedTrainingDataFileName}\" -output \"{modelFileName}\"", false); Console.WriteLine($"Saved model to {modelFileName}"); return(true); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); string predictFileName = Path.Combine(Settings.TempDir, "fasttext.txt"); File.WriteAllText(predictFileName, doc.Sentences[0].Text); var output = CmdHelper.Run(Path.Combine(Settings.AlgorithmDir, "fasttext"), $"predict-prob \"{modelFileName}.bin\" \"{predictFileName}\""); File.Delete(predictFileName); doc.Sentences[0].Intent = new TextClassificationResult { Classifier = "FasttextClassifier", Label = output.Split(' ')[0].Split(new string[] { "__label__" }, StringSplitOptions.None)[1], Confidence = decimal.Parse(output.Split(' ')[1]) }; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("featurize", Method.GET); List <List <decimal> > vectors = new List <List <decimal> >(); Boolean res = true; var dc = new DefaultDataContextLoader().GetDefaultDc(); /*var corpus = agent.GrabCorpus(dc); * * corpus.UserSays.ForEach(usersay => { * request.AddParameter("text", usersay.Text); * var response = client.Execute<Result>(request); * vectors.Add(response.Data.Vectors); * res = res && response.IsSuccessful; * });*/ // data.Add("Features", JToken.FromObject(vectors)); return(res); }
public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta) { Init(); doc.Tokenizer = this; doc.Sentences = new List <NlpDocSentence>(); agent.Corpus.UserSays.ForEach(say => { doc.Sentences.Add(new NlpDocSentence { Tokens = _tokenizer.Tokenize(say.Text), Text = say.Text, Intent = new TextClassificationResult { Label = say.Intent } }); }); return(true); }
public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta) { Init(meta); var sentence = doc.Sentences.Select(s => new Sentence { Text = s.Text, Words = s.Tokens }).First(); var result = _classifier.Classify(sentence); doc.Sentences[0].Intent = new TextClassificationResult { Classifier = "BotSharpIntentClassifier", Label = result.First().Item1, Confidence = result.First().Item2 }; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { //var input = new List<Tuple<String, JObject>>(); var texts = new List <String>(); var golds = new List <JObject>(); List <string> intentNames = agent.Intents.Select(x => x.Name).Distinct().ToList(); agent.Intents.ForEach(intent => { intent.UserSays.ForEach(userSay => { var text = String.Join(string.Empty, userSay.Data.Select(say => say.Text)); var dim = JObject.FromObject(new { }); intentNames.ForEach(name => { dim[name] = (intent.Name == name) ? 1 : 0; }); //input.Add(new Tuple<string, JObject>(text, JObject.FromObject(new { Cats = dim }))); texts.Add(text); golds.Add(JObject.FromObject(new { cats = dim })); }); }); var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("textcategorizer", Method.POST); request.RequestFormat = DataFormat.Json; request.AddParameter("application/json", JsonConvert.SerializeObject(new { Texts = texts.Take(2), Golds = golds.Take(2), Labels = intentNames }), ParameterType.RequestBody); var response = client.Execute <Result>(request); return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { String modelPath = "./entity_rec_output"; String newModelName = "test"; String outputDir = "./entity_rec_output2"; int iterTimes = 20; List <TrainingNode> trainingData = new List <TrainingNode>(); var dc = new DefaultDataContextLoader().GetDefaultDc(); /*var corpus = agent.GrabCorpus(dc); * * corpus.UserSays.ForEach(userSay => * { * if (userSay.Entities != null) { * //texts.Add(userSay.Text); * List<EntityLabel> entityLabel = new List<EntityLabel>(); * userSay.Entities.ForEach(entity => { * entityLabel.Add(new EntityLabel(entity.Start, entity.End, entity.Entity)); * entitiesInTrainingSet.Add(entity.Entity); * }); * trainingData.Add(new TrainingNode(userSay.Text, entityLabel)); * } * });*/ entitiesInTrainingSet = entitiesInTrainingSet.Distinct().ToList(); var client = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value); var request = new RestRequest("entityrecognizer", Method.POST); request.RequestFormat = DataFormat.Json; request.AddParameter("application/json", JsonConvert.SerializeObject(new NERTrainingModel(modelPath, newModelName, outputDir, iterTimes, trainingData, entitiesInTrainingSet)), ParameterType.RequestBody); var response = client.Execute <Result>(request); return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { meta.Model = "classification-nb.model"; string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); var options = new ClassifyOptions { ModelFilePath = modelFileName }; var classifier = new ClassifierFactory <NaiveBayesClassifier, SentenceFeatureExtractor>(options, SupportedLanguage.English); var sentences = doc.Sentences.Select(x => new Sentence { Label = x.Intent.Label, Text = x.Text, Words = x.Tokens }).ToList(); classifier.Train(sentences); Console.WriteLine($"Saved model to {modelFileName}"); return(true); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { return(true); }