Пример #1
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("tokenizer", Method.GET);
            List <List <NlpToken> > tokens = new List <List <NlpToken> >();
            Boolean res    = true;
            var     dc     = new DefaultDataContextLoader().GetDefaultDc();
            var     corpus = agent.Corpus;

            doc.Sentences = new List <NlpDocSentence>();

            corpus.UserSays.ForEach(usersay => {
                Console.WriteLine(usersay.Text);
                request.AddParameter("text", usersay.Text);
                var response = client.Execute <Result>(request);

                tokens.Add(response.Data.Tokens);

                doc.Sentences.Add(new NlpDocSentence
                {
                    Tokens = response.Data.Tokens,
                    Text   = usersay.Text
                });

                res = res && response.IsSuccessful;
            });

            return(res);
        }
Пример #2
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var options = new ClassifyOptions
            {
                ModelFilePath = Path.Combine(Settings.ModelDir, meta.Model)
            };
            var classifier = new ClassifierFactory <NaiveBayesClassifier, SentenceFeatureExtractor>(options, SupportedLanguage.English);

            var sentence = doc.Sentences.Select(s => new Sentence
            {
                Text  = s.Text,
                Words = s.Tokens
            }).First();


            var result = classifier.Classify(sentence);

            doc.Sentences[0].Intent = new TextClassificationResult
            {
                Classifier = "BotSharpNBayesClassifier",
                Label      = result.First().Item1,
                Confidence = (decimal)result.First().Item2
            };

            return(true);
        }
Пример #3
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("tokenizer", Method.POST);
            List <List <Token> > tokens = new List <List <Token> >();
            Boolean res    = true;
            var     corpus = agent.Corpus;

            doc.Sentences = new List <NlpDocSentence>();
            List <string> sentencesList = new List <string>();

            corpus.UserSays.ForEach(usersay => sentencesList.Add(usersay.Text));

            request.RequestFormat = DataFormat.Json;
            request.Method        = Method.POST;
            request.AddHeader("Content-Type", "application/json; charset=utf-8");
            request.AddParameter("application/json", JsonConvert.SerializeObject(new Documents(sentencesList)), ParameterType.RequestBody);

            var response = client.Execute <Result>(request);

            tokens = response.Data.TokensList;

            for (int i = 0; i < sentencesList.Count; i++)
            {
                doc.Sentences.Add(new NlpDocSentence
                {
                    Tokens = tokens[i],
                    Text   = sentencesList[i]
                });
            }
            res = res && response.IsSuccessful;
            return(res);
        }
Пример #4
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient($"{Configuration.GetSection("WitAiEntityRecognizer:url").Value}");
            var request = new RestRequest(Configuration.GetSection("WitAiEntityRecognizer:resource").Value, Method.GET);

            request.AddHeader("Authorization", "Bearer " + Configuration.GetSection("WitAiEntityRecognizer:serverAccessToken").Value);
            request.AddQueryParameter("v", Configuration.GetSection("WitAiEntityRecognizer:version").Value);
            request.AddQueryParameter("q", doc.Sentences[0].Text);
            request.AddQueryParameter("verbose", "true");
            request.AddQueryParameter("autosuggest", "true");

            var result = client.Execute <WitAiResponse>(request);

            var entities = result.Data.Entities[0];

            if (entities.Datetime != null)
            {
                doc.Sentences[0].Entities.AddRange(entities.Datetime.Select(x => Map(x)));
            }

            if (entities.Location != null)
            {
                doc.Sentences[0].Entities.AddRange(entities.Location.Select(x => Map(x)));
            }

            return(true);
        }
Пример #5
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            meta.Model = "classification-svm.model";
            string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-svm.parsed.txt");
            string modelFileName = Path.Combine(Settings.ModelDir, meta.Model);

            List <string> labels    = new List <string>();
            List <string> sentences = new List <string>();

            agent.Corpus.UserSays.ForEach(x => {
                agent.Intents.ForEach(intent => {
                    if (intent.Name == x.Intent)
                    {
                        labels.Add(agent.Intents.IndexOf(intent).ToString());
                    }
                });
                sentences.Add(x.Text);
            });

            NLP.Classify.SVMClassifier svmClassifier = new NLP.Classify.SVMClassifier();
            Args args = new Args();

            args.ModelFile = Path.Combine(Configuration.GetValue <String>("BotSharpSVMClassifier:wordvec"), "wordvec_enu.bin");
            var featureSetList = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).Sentence2Vec(sentences), labels);

            /*
             * // try using spacy doc2vec
             * var client = new RestClient("http://10.2.21.200:5005");
             * var request = new RestRequest("batchdoc2vec", Method.POST);
             * request.RequestFormat = DataFormat.Json;
             *
             * request.AddParameter("application/json", JsonConvert.SerializeObject(new {Sentences = sentences}), ParameterType.RequestBody);
             *
             * var response = client.Execute<Result>(request);
             * Result res = JsonConvert.DeserializeObject<Result>(response.Content);
             *
             * List<Vec> vecs = new List<Vec>();
             * foreach (List<double> cur in res.Doc2vecList)
             * {
             *  Vec vec = new Vec();
             *  vec.VecNodes = cur;
             *  vecs.Add(vec);
             * }
             * List<LabeledFeatureSet> featureSetList = svmClassifier.FeatureSetsGenerator(vecs, labels);
             * //
             */



            ClassifyOptions classifyOptions = new ClassifyOptions();

            classifyOptions.ModelFilePath     = Path.Combine(Settings.ModelDir, "svm_classifier_model");
            classifyOptions.TransformFilePath = Path.Combine(Settings.ModelDir, "transform_obj_data");
            // svmClassifier.Train(featureSetList, classifyOptions);

            meta.Meta = new JObject();
            meta.Meta["compiled at"] = "Aug 31, 2018";
            return(true);
        }
Пример #6
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            doc.Sentences.ForEach(x => _tagger.Tag(new Sentence {
                Words = x.Tokens
            }));

            return(true);
        }
Пример #7
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            string modelFileName   = Path.Combine(Settings.ModelDir, meta.Model);
            string predictFileName = Path.Combine(Settings.TempDir, "svm-predict-tempfile.txt");

            File.WriteAllText(predictFileName, doc.Sentences[0].Text);

            var  svmClassifier = new NLP.Classify.SVMClassifier();
            Args args          = new Args();

            args.ModelFile = Path.Combine(Configuration.GetValue <String>("BotSharpSVMClassifier:wordvec"), "wordvec_enu.bin");
            var featureSet = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).SingleSentence2Vec(doc.Sentences[0].Text), "");

            /*
             * //
             * var client = new RestClient("http://10.2.21.200:5005");
             * var request = new RestRequest("doc2vec", Method.GET);
             * request.AddParameter("text", doc.Sentences[0].Text);
             * var response = client.Execute<PredResult>(request);
             * PredResult pred = JsonConvert.DeserializeObject<PredResult>(response.Content);
             *
             * Vec vec = new Vec();
             * vec.VecNodes = pred.Doc2Vec;
             *
             * LabeledFeatureSet featureSet = svmClassifier.FeatureSetsGenerator(vec, "");
             * //
             */
            ClassifyOptions classifyOptions = new ClassifyOptions();

            classifyOptions.Model     = SVM.BotSharp.MachineLearning.Model.Read(Path.Combine(Settings.ModelDir, "svm_classifier_model"));
            classifyOptions.Transform = SVM.BotSharp.MachineLearning.RangeTransform.Read(Path.Combine(Settings.ModelDir, "transform_obj_data"));
            double[][] d = svmClassifier.Predict(featureSet, classifyOptions);

            string  intent     = null;
            decimal confidence = 0;
            double  max        = Double.MinValue;

            for (int i = 0; i < d[0].Count(); i++)
            {
                if (d[0][i] > max)
                {
                    max        = d[0][i];
                    intent     = agent.Intents[i].Name;
                    confidence = (decimal)d[0][i];
                }
            }

            File.Delete(predictFileName);

            doc.Sentences[0].Intent = new TextClassificationResult
            {
                Classifier = "SVMClassifier",
                Label      = intent,
                Confidence = confidence
            };

            return(true);
        }
Пример #8
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client   = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request  = new RestRequest("load", Method.GET);
            var response = client.Execute <Result>(request);

            meta.Meta = JObject.FromObject(response.Data);
            meta.Meta.Remove("models");
            meta.Model = response.Data.Models;

            return(response.IsSuccessful);
        }
Пример #9
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            meta.Model = "classification-fasttext.model";

            string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-fasttext.parsed.txt");
            string modelFileName = Path.Combine(Settings.ModelDir, meta.Model);

            // assemble corpus
            StringBuilder corpus = new StringBuilder();

            agent.Corpus.UserSays.ForEach(x => corpus.AppendLine($"__label__{x.Intent} {x.Text}"));

            List <string> labels    = new List <string>();
            List <string> sentences = new List <string>();


            agent.Corpus.UserSays.ForEach(x => {
                labels.Add(x.Intent);
                sentences.Add(x.Text);
            });

            Dictionary <string, string> labelDic = new Dictionary <string, string>();
            int num = 0;

            foreach (string label in labels)
            {
                if (labelDic.ContainsKey(label))
                {
                    continue;
                }
                labelDic.Add(label, num++.ToString());
            }
            ;
            List <string> labelNums = new List <string>();

            foreach (string label in labels)
            {
                labelNums.Add(labelDic[label]);
            }
            NLP.Classify.SVMClassifier svmClassifier = new NLP.Classify.SVMClassifier();
            Args args = new Args();

            args.WordDecoderModelFile = Path.Combine(Settings.ModelDir, "wordvec_enu.bin");
            List <LabeledFeatureSet> featureSetList = svmClassifier.FeatureSetsGenerator(new VectorGenerator(args).Sentence2Vec(sentences), labelNums);

            svmClassifier.Train(featureSetList, new ClassifyOptions(Path.Combine(Settings.ModelDir, "svm_classifier_model")));

            meta.Meta = new JObject();
            meta.Meta["compiled at"] = "Aug 31, 2018";


            return(true);
        }
Пример #10
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            doc.Tokenizer = this;

            // same as train
            doc.Sentences.ForEach(snt =>
            {
                snt.Tokens = _tokenizer.Tokenize(snt.Text);
            });

            return(true);
        }
Пример #11
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("entitize", Method.GET);

            request.AddParameter("text", "");
            var response = client.Execute <Result>(request);

            //data.Add("Entities", JToken.FromObject(response.Data.Entities));

            return(response.IsSuccessful);
        }
Пример #12
0
        public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            var decoder = new CRFDecoder();
            var options = new DecoderOptions
            {
                ModelFileName = System.IO.Path.Combine(Settings.ModelDir, meta.Model)
            };

            //Load encoded model from file
            decoder.LoadModel(options.ModelFileName);

            //Create decoder tagger instance.
            var tagger = decoder.CreateTagger(options.NBest, options.MaxWord);

            tagger.set_vlevel(options.ProbLevel);

            //Initialize result
            var crf_out = new CRFSegOut[options.NBest];

            for (var i = 0; i < options.NBest; i++)
            {
                crf_out[i] = new CRFSegOut(options.MaxWord);
            }

            doc.Sentences.ForEach(sent =>
            {
                List <List <String> > dataset = new List <List <string> >();
                dataset.AddRange(sent.Tokens.Select(token => new List <String> {
                    token.Text, token.Pos
                }).ToList());
                //predict given string's tags
                decoder.Segment(crf_out, tagger, dataset);

                var entities = new List <NlpEntity>();

                for (int i = 0; i < sent.Tokens.Count; i++)
                {
                    var entity = crf_out[0].result_;
                    entities.Add(new NlpEntity
                    {
                        Entity     = entity[i],
                        Start      = doc.Sentences[0].Tokens[i].Start,
                        Value      = doc.Sentences[0].Tokens[i].Text,
                        Confidence = 0,
                        Extrator   = "BotSharpNER"
                    });
                }

                sent.Entities = MergeEntity(doc.Sentences[0].Text, entities);
            });

            return(true);
        }
Пример #13
0
        public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            Init();

            doc.Sentences.ForEach(x => _tagger.Tag(new Sentence
            {
                Words = x.Tokens,
                Text  = x.Text
            }));

            return(true);
        }
Пример #14
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var dc     = new DefaultDataContextLoader().GetDefaultDc();
            var corpus = agent.Corpus;

            meta.Model = "ner-crf.model";

            List <TrainingIntentExpression <TrainingIntentExpressionPart> > userSays = corpus.UserSays;
            List <List <TrainingData> > list = new List <List <TrainingData> >();

            string rawTrainingDataFileName    = Path.Join(Settings.TrainDir, "ner-crf.corpus.txt");
            string parsedTrainingDataFileName = Path.Join(Settings.TrainDir, "ner-crf.parsed.txt");
            string modelFileName = Path.Join(Settings.ModelDir, meta.Model);

            using (FileStream fs = new FileStream(rawTrainingDataFileName, FileMode.Create))
            {
                using (StreamWriter sw = new StreamWriter(fs))
                {
                    for (int i = 0; i < doc.Sentences.Count; i++)
                    {
                        List <TrainingData> curLine = Merge(doc.Sentences[i].Tokens, userSays[i].Entities);
                        curLine.ForEach(trainingData =>
                        {
                            string[] wordParams = { trainingData.Entity, trainingData.Token, trainingData.Pos, trainingData.Chunk };
                            string wordStr      = string.Join(" ", wordParams);
                            sw.Write(wordStr + "\n");
                        });
                        list.Add(curLine);
                        sw.Write("\n");
                    }
                    sw.Flush();
                }
            }

            var fields      = Configuration.GetValue <String>($"CRFsuiteEntityRecognizer:fields");
            var uniFeatures = Configuration.GetValue <String>($"CRFsuiteEntityRecognizer:uniFeatures");
            var biFeatures  = Configuration.GetValue <String>($"CRFsuiteEntityRecognizer:biFeatures");

            new MachineLearning.CRFsuite.Ner()
            .NerStart(rawTrainingDataFileName, parsedTrainingDataFileName, fields, uniFeatures.Split(" "), biFeatures.Split(" "));

            var algorithmDir = Path.Join(AppDomain.CurrentDomain.GetData("ContentRootPath").ToString(), "Algorithms");

            CmdHelper.Run(Path.Join(algorithmDir, "crfsuite"), $"learn -m {modelFileName} {parsedTrainingDataFileName}"); // --split=3 -x
            Console.WriteLine($"Saved model to {modelFileName}");
            meta.Meta                = new JObject();
            meta.Meta["fields"]      = fields;
            meta.Meta["uniFeatures"] = uniFeatures;
            meta.Meta["biFeatures"]  = biFeatures;

            return(true);
        }
Пример #15
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient(Configuration.GetSection("NltkProvider:Url").Value);
            var request = new RestRequest("nltktokenizesentences", Method.POST);
            List <List <Token> > tokens = new List <List <Token> >();
            Boolean res    = true;
            var     dc     = new DefaultDataContextLoader().GetDefaultDc();
            var     corpus = agent.Corpus;

            doc.Sentences = new List <NlpDocSentence>();
            List <string> sentencesList = new List <string>();

            corpus.UserSays.ForEach(usersay => sentencesList.Add(usersay.Text));

            request.RequestFormat = DataFormat.Json;

            request.AddParameter("application/json", JsonConvert.SerializeObject(new Documents(sentencesList)), ParameterType.RequestBody);

            var response = client.Execute <Result>(request);

            tokens = response.Data.TokensList;

            for (int i = 0; i < sentencesList.Count; i++)
            {
                doc.Sentences.Add(new NlpDocSentence
                {
                    Tokens = tokens[i],
                    Text   = sentencesList[i]
                });
            }
            res = res && response.IsSuccessful;
            return(res);

            /*
             * corpus.UserSays.ForEach(usersay => {
             *  Console.WriteLine(usersay.Text);
             *  request.AddParameter("text", usersay.Text);
             *  var response = client.Execute<Result>(request);
             *
             *  tokens.Add(response.Data.Tokens);
             *
             *  doc.Sentences.Add(new NlpDocSentence
             *  {
             *      Tokens = response.Data.Tokens,
             *      Text = usersay.Text
             *  });
             *
             *  res = res && response.IsSuccessful;
             * });
             */
        }
Пример #16
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            doc.Tokenizer = this;
            doc.Sentences = new List <NlpDocSentence>();

            agent.Corpus.UserSays.ForEach(say =>
            {
                doc.Sentences.Add(new NlpDocSentence
                {
                    Tokens = _tokenizer.Tokenize(say.Text),
                    Text   = say.Text
                });
            });

            return(true);
        }
Пример #17
0
        public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            var corpus = agent.Corpus;

            meta.Model = "ner-crf.model";

            List <TrainingIntentExpression <TrainingIntentExpressionPart> > userSays = corpus.UserSays;
            List <List <TrainingData> > list = new List <List <TrainingData> >();

            string rawTrainingDataFileName = System.IO.Path.Combine(Settings.ModelDir, "ner-crf.corpus.txt");
            string modelFileName           = System.IO.Path.Combine(Settings.ModelDir, meta.Model);

            using (FileStream fs = new FileStream(rawTrainingDataFileName, FileMode.Create))
            {
                using (StreamWriter sw = new StreamWriter(fs))
                {
                    for (int i = 0; i < doc.Sentences.Count; i++)
                    {
                        List <TrainingData> curLine = Merge(doc, doc.Sentences[i].Tokens, userSays[i].Entities);
                        curLine.ForEach(trainingData =>
                        {
                            string[] wordParams = { trainingData.Token, trainingData.Pos, trainingData.Entity };
                            string wordStr      = string.Join("\t", wordParams);
                            sw.WriteLine(wordStr);
                        });
                        list.Add(curLine);
                        sw.WriteLine();
                    }
                    sw.Flush();
                }
            }

            string contentDir = AppDomain.CurrentDomain.GetData("DataPath").ToString();
            string template   = Configuration.GetValue <String>($"template");

            template = template.Replace("|App_Data|", contentDir + System.IO.Path.DirectorySeparatorChar);

            var  encoder = new CRFEncoder();
            bool result  = encoder.Learn(new EncoderOptions
            {
                TrainingCorpusFileName = rawTrainingDataFileName,
                TemplateFileName       = template,
                ModelFileName          = modelFileName,
            });

            return(result);
        }
Пример #18
0
        public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            Init(meta);

            var sentences = doc.Sentences.Select(x => new Sentence
            {
                Label = x.Intent.Label,
                Text  = x.Text,
                Words = x.Tokens
            }).ToList();

            _classifier.Train(sentences);

            Console.WriteLine($"Saved model to {Settings.ModelDir}");

            return(true);
        }
Пример #19
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("tagger", Method.GET);
            List <List <String> > tags = new List <List <String> >();
            Boolean res    = true;
            var     dc     = new DefaultDataContextLoader().GetDefaultDc();
            var     corpus = agent.Corpus;

            corpus.UserSays.ForEach(usersay => {
                request.AddParameter("text", usersay.Text);
                var response = client.Execute <Result>(request);
                tags.Add(response.Data.Tags);
                res = res && response.IsSuccessful;
            });

            return(res);
        }
Пример #20
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("tokenizer", Method.GET);
            List <List <Token> > tokens = new List <List <Token> >();
            Boolean res    = true;
            var     corpus = agent.Corpus;

            request.AddParameter("text", doc.Sentences[0].Text);
            var response = client.Execute <Result>(request);

            tokens = response.Data.TokensList;

            res = res && response.IsSuccessful;

            doc.Sentences[0].Tokens = tokens[0];

            return(true);
        }
Пример #21
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            string modelFileName   = Path.Join(Settings.ModelDir, meta.Model);
            string predictFileName = Path.Join(Settings.PredictDir, "fasttext.txt");

            File.WriteAllText(predictFileName, doc.Sentences[0].Text);

            var output = Engines.Classifiers.CmdHelper.Run(Path.Join(Settings.AlgorithmDir, "fasttext"), $"predict-prob {modelFileName}.bin {predictFileName}");

            File.Delete(predictFileName);

            doc.Sentences[0].Intent = new TextClassificationResult
            {
                Label      = output.Split(' ')[0].Split("__label__")[1],
                Confidence = decimal.Parse(output.Split(' ')[1])
            };

            return(true);
        }
Пример #22
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            meta.Model = "classification-fasttext.model";

            string parsedTrainingDataFileName = Path.Combine(Settings.TempDir, $"classification-fasttext.parsed.txt");
            string modelFileName = Path.Combine(Settings.ModelDir, meta.Model);

            // assemble corpus
            StringBuilder corpus = new StringBuilder();

            agent.Corpus.UserSays.ForEach(x => corpus.AppendLine($"__label__{x.Intent} {x.Text}"));

            File.WriteAllText(parsedTrainingDataFileName, corpus.ToString());

            var output = CmdHelper.Run(Path.Combine(Settings.AlgorithmDir, "fasttext"), $"supervised -input \"{parsedTrainingDataFileName}\" -output \"{modelFileName}\"", false);

            Console.WriteLine($"Saved model to {modelFileName}");

            return(true);
        }
Пример #23
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            string modelFileName   = Path.Combine(Settings.ModelDir, meta.Model);
            string predictFileName = Path.Combine(Settings.TempDir, "fasttext.txt");

            File.WriteAllText(predictFileName, doc.Sentences[0].Text);

            var output = CmdHelper.Run(Path.Combine(Settings.AlgorithmDir, "fasttext"), $"predict-prob \"{modelFileName}.bin\" \"{predictFileName}\"");

            File.Delete(predictFileName);

            doc.Sentences[0].Intent = new TextClassificationResult
            {
                Classifier = "FasttextClassifier",
                Label      = output.Split(' ')[0].Split(new string[] { "__label__" }, StringSplitOptions.None)[1],
                Confidence = decimal.Parse(output.Split(' ')[1])
            };

            return(true);
        }
Пример #24
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("featurize", Method.GET);
            List <List <decimal> > vectors = new List <List <decimal> >();
            Boolean res = true;
            var     dc  = new DefaultDataContextLoader().GetDefaultDc();

            /*var corpus = agent.GrabCorpus(dc);
             *
             * corpus.UserSays.ForEach(usersay => {
             *  request.AddParameter("text", usersay.Text);
             *  var response = client.Execute<Result>(request);
             *  vectors.Add(response.Data.Vectors);
             *  res = res && response.IsSuccessful;
             * });*/

            // data.Add("Features", JToken.FromObject(vectors));

            return(res);
        }
Пример #25
0
        public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            Init();

            doc.Tokenizer = this;
            doc.Sentences = new List <NlpDocSentence>();

            agent.Corpus.UserSays.ForEach(say =>
            {
                doc.Sentences.Add(new NlpDocSentence
                {
                    Tokens = _tokenizer.Tokenize(say.Text),
                    Text   = say.Text,
                    Intent = new TextClassificationResult {
                        Label = say.Intent
                    }
                });
            });

            return(true);
        }
Пример #26
0
        public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            Init(meta);

            var sentence = doc.Sentences.Select(s => new Sentence
            {
                Text  = s.Text,
                Words = s.Tokens
            }).First();


            var result = _classifier.Classify(sentence);

            doc.Sentences[0].Intent = new TextClassificationResult
            {
                Classifier = "BotSharpIntentClassifier",
                Label      = result.First().Item1,
                Confidence = result.First().Item2
            };

            return(true);
        }
Пример #27
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            //var input = new List<Tuple<String, JObject>>();

            var texts = new List <String>();
            var golds = new List <JObject>();

            List <string> intentNames = agent.Intents.Select(x => x.Name).Distinct().ToList();

            agent.Intents.ForEach(intent =>
            {
                intent.UserSays.ForEach(userSay => {
                    var text = String.Join(string.Empty, userSay.Data.Select(say => say.Text));
                    var dim  = JObject.FromObject(new { });

                    intentNames.ForEach(name =>
                    {
                        dim[name] = (intent.Name == name) ? 1 : 0;
                    });

                    //input.Add(new Tuple<string, JObject>(text, JObject.FromObject(new { Cats = dim })));
                    texts.Add(text);
                    golds.Add(JObject.FromObject(new { cats = dim }));
                });
            });

            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("textcategorizer", Method.POST);

            request.RequestFormat = DataFormat.Json;

            request.AddParameter("application/json", JsonConvert.SerializeObject(new { Texts = texts.Take(2), Golds = golds.Take(2), Labels = intentNames }), ParameterType.RequestBody);

            var response = client.Execute <Result>(request);

            return(true);
        }
Пример #28
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            String modelPath    = "./entity_rec_output";
            String newModelName = "test";
            String outputDir    = "./entity_rec_output2";
            int    iterTimes    = 20;

            List <TrainingNode> trainingData = new List <TrainingNode>();

            var dc = new DefaultDataContextLoader().GetDefaultDc();

            /*var corpus = agent.GrabCorpus(dc);
             *
             * corpus.UserSays.ForEach(userSay =>
             * {
             *  if (userSay.Entities != null) {
             *      //texts.Add(userSay.Text);
             *      List<EntityLabel> entityLabel = new List<EntityLabel>();
             *      userSay.Entities.ForEach(entity => {
             *          entityLabel.Add(new EntityLabel(entity.Start, entity.End, entity.Entity));
             *          entitiesInTrainingSet.Add(entity.Entity);
             *      });
             *      trainingData.Add(new TrainingNode(userSay.Text, entityLabel));
             *  }
             * });*/
            entitiesInTrainingSet = entitiesInTrainingSet.Distinct().ToList();
            var client  = new RestClient(Configuration.GetSection("SpaCyProvider:Url").Value);
            var request = new RestRequest("entityrecognizer", Method.POST);

            request.RequestFormat = DataFormat.Json;

            request.AddParameter("application/json", JsonConvert.SerializeObject(new NERTrainingModel(modelPath, newModelName, outputDir, iterTimes, trainingData, entitiesInTrainingSet)), ParameterType.RequestBody);

            var response = client.Execute <Result>(request);

            return(true);
        }
Пример #29
0
        public async Task <bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
        {
            meta.Model = "classification-nb.model";
            string modelFileName = Path.Combine(Settings.ModelDir, meta.Model);

            var options = new ClassifyOptions
            {
                ModelFilePath = modelFileName
            };
            var classifier = new ClassifierFactory <NaiveBayesClassifier, SentenceFeatureExtractor>(options, SupportedLanguage.English);

            var sentences = doc.Sentences.Select(x => new Sentence
            {
                Label = x.Intent.Label,
                Text  = x.Text,
                Words = x.Tokens
            }).ToList();

            classifier.Train(sentences);

            Console.WriteLine($"Saved model to {modelFileName}");

            return(true);
        }
Пример #30
0
 public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
 {
     return(true);
 }