Esempio n. 1
0
        public void TestEncode()
        {
            var  encoder = new CRFEncoder();
            bool result  = encoder.Learn(new EncoderOptions
            {
                /*
                 * traing corups format, split by tab, sentences is seperated by blank row
                 *
                 *  ! PUN S
                 *  Tokyo NNP S_LOCATION
                 *  and	CC S
                 *  New	NNP	B_LOCATION
                 *  York NNP	E_LOCATION
                 *  are	VBP	S
                 *  major JJ S
                 *  financial JJ S
                 *  centers	NNS	S
                 *  . PUN S
                 */
                TrainingCorpusFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\eng.1k.training",
                TemplateFileName       = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\template.en",
                ModelFileName          = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\ner_model"
            });

            Assert.IsTrue(result);
        }
Esempio n. 2
0
 public void TestEncode()
 {
     var  encoder = new CRFEncoder();
     bool result  = encoder.Learn(new EncoderOptions
     {
         TrainingCorpusFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\corpus\eng.1K.training",
         TemplateFileName       = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\template.NE",
         ModelFileName          = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\model\ner_model_eng"
     });
 }
Esempio n. 3
0
        public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            var corpus = agent.Corpus;

            meta.Model = "ner-crf.model";

            List <TrainingIntentExpression <TrainingIntentExpressionPart> > userSays = corpus.UserSays;
            List <List <TrainingData> > list = new List <List <TrainingData> >();

            string rawTrainingDataFileName = System.IO.Path.Combine(Settings.ModelDir, "ner-crf.corpus.txt");
            string modelFileName           = System.IO.Path.Combine(Settings.ModelDir, meta.Model);

            using (FileStream fs = new FileStream(rawTrainingDataFileName, FileMode.Create))
            {
                using (StreamWriter sw = new StreamWriter(fs))
                {
                    for (int i = 0; i < doc.Sentences.Count; i++)
                    {
                        List <TrainingData> curLine = Merge(doc, doc.Sentences[i].Tokens, userSays[i].Entities);
                        curLine.ForEach(trainingData =>
                        {
                            string[] wordParams = { trainingData.Token, trainingData.Pos, trainingData.Entity };
                            string wordStr      = string.Join("\t", wordParams);
                            sw.WriteLine(wordStr);
                        });
                        list.Add(curLine);
                        sw.WriteLine();
                    }
                    sw.Flush();
                }
            }

            string contentDir = AppDomain.CurrentDomain.GetData("DataPath").ToString();
            string template   = Configuration.GetValue <String>($"template");

            template = template.Replace("|App_Data|", contentDir + System.IO.Path.DirectorySeparatorChar);

            var  encoder = new CRFEncoder();
            bool result  = encoder.Learn(new EncoderOptions
            {
                TrainingCorpusFileName = rawTrainingDataFileName,
                TemplateFileName       = template,
                ModelFileName          = modelFileName,
            });

            return(result);
        }