public void TestDecode()
        {
            var decoder = new CRFDecoder();
            var options = new DecoderOptions
            {
                ModelFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\CRF\ner_model"
            };

            //Load encoded model from file
            decoder.LoadModel(options.ModelFileName);

            //Create decoder tagger instance.
            var tagger = decoder.CreateTagger(options.NBest, options.MaxWord);

            tagger.set_vlevel(options.ProbLevel);

            //Initialize result
            var crf_out = new CRFSegOut[options.NBest];

            for (var i = 0; i < options.NBest; i++)
            {
                crf_out[i] = new CRFSegOut(options.MaxWord);
            }

            var dataset = GetTestData();

            //predict given string's tags
            decoder.Segment(crf_out, tagger, dataset);
        }
Пример #2
0
 public void TestDecode()
 {
     var  encoder = new CRFDecoder();
     bool result  = Decode(new DecoderOptions
     {
         InputFileName  = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\test\test.txt",
         ModelFileName  = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\model\ner_model_eng",
         OutputFileName = @"C:\Users\haipi\Documents\Projects\BotSharp\Data\English\test\output.txt"
     });
 }
Пример #3
0
        public async Task <bool> Predict(AgentBase agent, NlpDoc doc, PipeModel meta)
        {
            var decoder = new CRFDecoder();
            var options = new DecoderOptions
            {
                ModelFileName = System.IO.Path.Combine(Settings.ModelDir, meta.Model)
            };

            //Load encoded model from file
            decoder.LoadModel(options.ModelFileName);

            //Create decoder tagger instance.
            var tagger = decoder.CreateTagger(options.NBest, options.MaxWord);

            tagger.set_vlevel(options.ProbLevel);

            //Initialize result
            var crf_out = new CRFSegOut[options.NBest];

            for (var i = 0; i < options.NBest; i++)
            {
                crf_out[i] = new CRFSegOut(options.MaxWord);
            }

            doc.Sentences.ForEach(sent =>
            {
                List <List <String> > dataset = new List <List <string> >();
                dataset.AddRange(sent.Tokens.Select(token => new List <String> {
                    token.Text, token.Pos
                }).ToList());
                //predict given string's tags
                decoder.Segment(crf_out, tagger, dataset);

                var entities = new List <NlpEntity>();

                for (int i = 0; i < sent.Tokens.Count; i++)
                {
                    var entity = crf_out[0].result_;
                    entities.Add(new NlpEntity
                    {
                        Entity     = entity[i],
                        Start      = doc.Sentences[0].Tokens[i].Start,
                        Value      = doc.Sentences[0].Tokens[i].Text,
                        Confidence = 0,
                        Extrator   = "BotSharpNER"
                    });
                }

                sent.Entities = MergeEntity(doc.Sentences[0].Text, entities);
            });

            return(true);
        }
Пример #4
0
        bool Decode(DecoderOptions options)
        {
            var parallelOption = new ParallelOptions();
            var watch          = Stopwatch.StartNew();

            var          sr = new StreamReader(options.InputFileName);
            StreamWriter sw = null, swSeg = null;

            if (options.OutputFileName != null && options.OutputFileName.Length > 0)
            {
                sw = new StreamWriter(options.OutputFileName);
            }
            if (options.OutputSegFileName != null && options.OutputSegFileName.Length > 0)
            {
                swSeg = new StreamWriter(options.OutputSegFileName);
            }

            //Create CRFSharp wrapper instance. It's a global instance
            var crfWrapper = new CRFDecoder();

            //Load encoded model from file
            //Logger.WriteLine("Loading model from {0}", options.strModelFileName);
            crfWrapper.LoadModel(options.ModelFileName);

            var queueRecords    = new ConcurrentQueue <List <List <string> > >();
            var queueSegRecords = new ConcurrentQueue <List <List <string> > >();

            parallelOption.MaxDegreeOfParallelism = options.Thread;
            Parallel.For(0, options.Thread, parallelOption, t =>
            {
                //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance
                var tagger = crfWrapper.CreateTagger(options.NBest, options.MaxWord);
                tagger.set_vlevel(options.ProbLevel);

                //Initialize result
                var crf_out = new crf_seg_out[options.NBest];
                for (var i = 0; i < options.NBest; i++)
                {
                    crf_out[i] = new crf_seg_out(tagger.crf_max_word_num);
                }

                var inbuf = new List <List <string> >();
                while (true)
                {
                    lock (rdLocker)
                    {
                        if (ReadRecord(inbuf, sr) == false)
                        {
                            break;
                        }

                        queueRecords.Enqueue(inbuf);
                        queueSegRecords.Enqueue(inbuf);
                    }

                    //Call CRFSharp wrapper to predict given string's tags
                    if (swSeg != null)
                    {
                        crfWrapper.Segment(crf_out, tagger, inbuf);
                    }
                    else
                    {
                        crfWrapper.Segment((CRFTermOut[])crf_out, (DecoderTagger)tagger, inbuf);
                    }

                    List <List <string> > peek = null;
                    //Save segmented tagged result into file
                    if (swSeg != null)
                    {
                        var rstList = ConvertCRFTermOutToStringList(inbuf, crf_out);
                        while (peek != inbuf)
                        {
                            queueSegRecords.TryPeek(out peek);
                        }
                        for (int index = 0; index < rstList.Count; index++)
                        {
                            var item = rstList[index];
                            swSeg.WriteLine(item);
                        }
                        queueSegRecords.TryDequeue(out peek);
                        peek = null;
                    }

                    //Save raw tagged result (with probability) into file
                    if (sw != null)
                    {
                        while (peek != inbuf)
                        {
                            queueRecords.TryPeek(out peek);
                        }
                        OutputRawResultToFile(inbuf, crf_out, tagger, sw);
                        queueRecords.TryDequeue(out peek);
                    }
                }
            });


            sr.Close();

            if (sw != null)
            {
                sw.Close();
            }
            if (swSeg != null)
            {
                swSeg.Close();
            }
            watch.Stop();
            //Logger.WriteLine("Elapsed: {0} ms", watch.ElapsedMilliseconds);

            return(true);
        }