public void TestEncode() { var encoder = new CRFEncoder(); bool result = encoder.Learn(new EncoderOptions { /* * traing corups format, split by tab, sentences is seperated by blank row * * ! PUN S * Tokyo NNP S_LOCATION * and CC S * New NNP B_LOCATION * York NNP E_LOCATION * are VBP S * major JJ S * financial JJ S * centers NNS S * . PUN S */ TrainingCorpusFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\eng.1k.training", TemplateFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\template.en", ModelFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\ner_model" }); Assert.IsTrue(result); }
public void TestEncode() { var encoder = new CRFEncoder(); bool result = encoder.Learn(new EncoderOptions { TrainingCorpusFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\corpus\eng.1K.training", TemplateFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\template.NE", ModelFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\model\ner_model_eng" }); }
public async Task <bool> Train(AgentBase agent, NlpDoc doc, PipeModel meta) { var corpus = agent.Corpus; meta.Model = "ner-crf.model"; List <TrainingIntentExpression <TrainingIntentExpressionPart> > userSays = corpus.UserSays; List <List <TrainingData> > list = new List <List <TrainingData> >(); string rawTrainingDataFileName = System.IO.Path.Combine(Settings.ModelDir, "ner-crf.corpus.txt"); string modelFileName = System.IO.Path.Combine(Settings.ModelDir, meta.Model); using (FileStream fs = new FileStream(rawTrainingDataFileName, FileMode.Create)) { using (StreamWriter sw = new StreamWriter(fs)) { for (int i = 0; i < doc.Sentences.Count; i++) { List <TrainingData> curLine = Merge(doc, doc.Sentences[i].Tokens, userSays[i].Entities); curLine.ForEach(trainingData => { string[] wordParams = { trainingData.Token, trainingData.Pos, trainingData.Entity }; string wordStr = string.Join("\t", wordParams); sw.WriteLine(wordStr); }); list.Add(curLine); sw.WriteLine(); } sw.Flush(); } } string contentDir = AppDomain.CurrentDomain.GetData("DataPath").ToString(); string template = Configuration.GetValue <String>($"template"); template = template.Replace("|App_Data|", contentDir + System.IO.Path.DirectorySeparatorChar); var encoder = new CRFEncoder(); bool result = encoder.Learn(new EncoderOptions { TrainingCorpusFileName = rawTrainingDataFileName, TemplateFileName = template, ModelFileName = modelFileName, }); return(result); }