public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { meta.Model = "classification-svm.model"; string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-svm.parsed.txt"); string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); List <string> labels = new List <string>(); List <string> sentences = new List <string>(); agent.Corpus.UserSays.ForEach(x => { agent.Intents.ForEach(intent => { if (intent.Name == x.Intent) { labels.Add(agent.Intents.IndexOf(intent).ToString()); } }); sentences.Add(x.Text); }); NLP.Classify.SVMClassifier svmClassifier = new NLP.Classify.SVMClassifier(); Args args = new Args(); args.ModelFile = Path.Combine(Configuration.GetValue <String>("BotSharpSVMClassifier:wordvec"), "wordvec_enu.bin"); var featureSetList = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).Sentence2Vec(sentences), labels); /* * // try using spacy doc2vec * var client = new RestClient("http://10.2.21.200:5005"); * var request = new RestRequest("batchdoc2vec", Method.POST); * request.RequestFormat = DataFormat.Json; * * request.AddParameter("application/json", JsonConvert.SerializeObject(new {Sentences = sentences}), ParameterType.RequestBody); * * var response = client.Execute<Result>(request); * Result res = JsonConvert.DeserializeObject<Result>(response.Content); * * List<Vec> vecs = new List<Vec>(); * foreach (List<double> cur in res.Doc2vecList) * { * Vec vec = new Vec(); * vec.VecNodes = cur; * vecs.Add(vec); * } * List<LabeledFeatureSet> featureSetList = svmClassifier.FeatureSetsGenerator(vecs, labels); * // */ ClassifyOptions classifyOptions = new ClassifyOptions(); classifyOptions.ModelFilePath = Path.Combine(Settings.ModelDir, "svm_classifier_model"); classifyOptions.TransformFilePath = Path.Combine(Settings.ModelDir, "transform_obj_data"); // svmClassifier.Train(featureSetList, classifyOptions); meta.Meta = new JObject(); meta.Meta["compiled at"] = "Aug 31, 2018"; return(true); }
public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); string predictFileName = Path.Combine(Settings.TempDir, "svm-predict-tempfile.txt"); File.WriteAllText(predictFileName, doc.Sentences[0].Text); var svmClassifier = new NLP.Classify.SVMClassifier(); Args args = new Args(); args.ModelFile = Path.Combine(Configuration.GetValue <String>("BotSharpSVMClassifier:wordvec"), "wordvec_enu.bin"); var featureSet = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).SingleSentence2Vec(doc.Sentences[0].Text), ""); /* * // * var client = new RestClient("http://10.2.21.200:5005"); * var request = new RestRequest("doc2vec", Method.GET); * request.AddParameter("text", doc.Sentences[0].Text); * var response = client.Execute<PredResult>(request); * PredResult pred = JsonConvert.DeserializeObject<PredResult>(response.Content); * * Vec vec = new Vec(); * vec.VecNodes = pred.Doc2Vec; * * LabeledFeatureSet featureSet = svmClassifier.FeatureSetsGenerator(vec, ""); * // */ ClassifyOptions classifyOptions = new ClassifyOptions(); classifyOptions.Model = SVM.BotSharp.MachineLearning.Model.Read(Path.Combine(Settings.ModelDir, "svm_classifier_model")); classifyOptions.Transform = SVM.BotSharp.MachineLearning.RangeTransform.Read(Path.Combine(Settings.ModelDir, "transform_obj_data")); double[][] d = svmClassifier.Predict(featureSet, classifyOptions); string intent = null; decimal confidence = 0; double max = Double.MinValue; for (int i = 0; i < d[0].Count(); i++) { if (d[0][i] > max) { max = d[0][i]; intent = agent.Intents[i].Name; confidence = (decimal)d[0][i]; } } File.Delete(predictFileName); doc.Sentences[0].Intent = new TextClassificationResult { Classifier = "SVMClassifier", Label = intent, Confidence = confidence }; return(true); }
public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta) { meta.Model = "classification-fasttext.model"; string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-fasttext.parsed.txt"); string modelFileName = Path.Combine(Settings.ModelDir, meta.Model); // assemble corpus StringBuilder corpus = new StringBuilder(); agent.Corpus.UserSays.ForEach(x => corpus.AppendLine($"__label__{x.Intent} {x.Text}")); List <string> labels = new List <string>(); List <string> sentences = new List <string>(); agent.Corpus.UserSays.ForEach(x => { labels.Add(x.Intent); sentences.Add(x.Text); }); Dictionary <string, string> labelDic = new Dictionary <string, string>(); int num = 0; foreach (string label in labels) { if (labelDic.ContainsKey(label)) { continue; } labelDic.Add(label, num++.ToString()); } ; List <string> labelNums = new List <string>(); foreach (string label in labels) { labelNums.Add(labelDic[label]); } NLP.Classify.SVMClassifier svmClassifier = new NLP.Classify.SVMClassifier(); Args args = new Args(); args.WordDecoderModelFile = Path.Combine(Settings.ModelDir, "wordvec_enu.bin"); List <LabeledFeatureSet> featureSetList = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).Sentence2Vec(sentences), labelNums); svmClassifier.Train(featureSetList, new ClassifyOptions(Path.Combine(Settings.ModelDir, "svm_classifier_model"))); meta.Meta = new JObject(); meta.Meta["compiled at"] = "Aug 31, 2018"; return(true); }