public void TestDecode() { var decoder = new CRFDecoder(); var options = new DecoderOptions { ModelFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\ner_model" }; //Load encoded model from file decoder.LoadModel(options.ModelFileName); //Create decoder tagger instance. var tagger = decoder.CreateTagger(options.NBest, options.MaxWord); tagger.set_vlevel(options.ProbLevel); //Initialize result var crf_out = new CRFSegOut[options.NBest]; for (var i = 0; i < options.NBest; i++) { crf_out[i] = new CRFSegOut(options.MaxWord); } var dataset = GetTestData(); //predict given string's tags decoder.Segment(crf_out, tagger, dataset); }
public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta) { var decoder = new CRFDecoder(); var options = new DecoderOptions { ModelFileName = System.IO.Path.Combine(Settings.ModelDir, meta.Model) }; //Load encoded model from file decoder.LoadModel(options.ModelFileName); //Create decoder tagger instance. var tagger = decoder.CreateTagger(options.NBest, options.MaxWord); tagger.set_vlevel(options.ProbLevel); //Initialize result var crf_out = new CRFSegOut[options.NBest]; for (var i = 0; i < options.NBest; i++) { crf_out[i] = new CRFSegOut(options.MaxWord); } doc.Sentences.ForEach(sent => { List <List <String> > dataset = new List <List <string> >(); dataset.AddRange(sent.Tokens.Select(token => new List <String> { token.Text, token.Pos }).ToList()); //predict given string's tags decoder.Segment(crf_out, tagger, dataset); var entities = new List <NlpEntity>(); for (int i = 0; i < sent.Tokens.Count; i++) { var entity = crf_out[0].result_; entities.Add(new NlpEntity { Entity = entity[i], Start = doc.Sentences[0].Tokens[i].Start, Value = doc.Sentences[0].Tokens[i].Text, Confidence = 0, Extrator = "BotSharpNER" }); } sent.Entities = MergeEntity(doc.Sentences[0].Text, entities); }); return(true); }
bool Decode(DecoderOptions options) { var parallelOption = new ParallelOptions(); var watch = Stopwatch.StartNew(); var sr = new StreamReader(options.InputFileName); StreamWriter sw = null, swSeg = null; if (options.OutputFileName != null && options.OutputFileName.Length > 0) { sw = new StreamWriter(options.OutputFileName); } if (options.OutputSegFileName != null && options.OutputSegFileName.Length > 0) { swSeg = new StreamWriter(options.OutputSegFileName); } //Create CRFSharp wrapper instance. It's a global instance var crfWrapper = new CRFDecoder(); //Load encoded model from file //Logger.WriteLine("Loading model from {0}", options.strModelFileName); crfWrapper.LoadModel(options.ModelFileName); var queueRecords = new ConcurrentQueue <List <List <string> > >(); var queueSegRecords = new ConcurrentQueue <List <List <string> > >(); parallelOption.MaxDegreeOfParallelism = options.Thread; Parallel.For(0, options.Thread, parallelOption, t => { //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance var tagger = crfWrapper.CreateTagger(options.NBest, options.MaxWord); tagger.set_vlevel(options.ProbLevel); //Initialize result var crf_out = new crf_seg_out[options.NBest]; for (var i = 0; i < options.NBest; i++) { crf_out[i] = new crf_seg_out(tagger.crf_max_word_num); } var inbuf = new List <List <string> >(); while (true) { lock (rdLocker) { if (ReadRecord(inbuf, sr) == false) { break; } queueRecords.Enqueue(inbuf); queueSegRecords.Enqueue(inbuf); } //Call CRFSharp wrapper to predict given string's tags if (swSeg != null) { crfWrapper.Segment(crf_out, tagger, inbuf); } else { crfWrapper.Segment((CRFTermOut[])crf_out, (DecoderTagger)tagger, inbuf); } List <List <string> > peek = null; //Save segmented tagged result into file if (swSeg != null) { var rstList = ConvertCRFTermOutToStringList(inbuf, crf_out); while (peek != inbuf) { queueSegRecords.TryPeek(out peek); } for (int index = 0; index < rstList.Count; index++) { var item = rstList[index]; swSeg.WriteLine(item); } queueSegRecords.TryDequeue(out peek); peek = null; } //Save raw tagged result (with probability) into file if (sw != null) { while (peek != inbuf) { queueRecords.TryPeek(out peek); } OutputRawResultToFile(inbuf, crf_out, tagger, sw); queueRecords.TryDequeue(out peek); } } }); sr.Close(); if (sw != null) { sw.Close(); } if (swSeg != null) { swSeg.Close(); } watch.Stop(); //Logger.WriteLine("Elapsed: {0} ms", watch.ElapsedMilliseconds); return(true); }