Beispiel #1
0
        public SegDecoderTagger CreateTagger()
        {
            if (modelReader == null)
            {
                return(null);
            }
            SegDecoderTagger tagger = new SegDecoderTagger();

            tagger.init_by_model(modelReader);

            return(tagger);
        }
Beispiel #2
0
        //Segment given text
        public int Segment(crf_seg_out[] pout,         //segment result
                           SegDecoderTagger tagger,    //Tagger per thread
                           List <List <string> > inbuf //feature set for segment
                           )
        {
            int ret = 0;

            if (inbuf.Count == 0)
            {
                //Empty input string
                return(Utils.ERROR_SUCCESS);
            }

            ret = tagger.reset();
            if (ret < 0)
            {
                return(ret);
            }

            ret = tagger.add(inbuf);
            if (ret < 0)
            {
                return(ret);
            }

            //parse
            ret = tagger.parse();
            if (ret < 0)
            {
                return(ret);
            }

            //wrap result
            ret = tagger.output(pout);
            if (ret < 0)
            {
                return(ret);
            }

            return(Utils.ERROR_SUCCESS);
        }
Beispiel #3
0
        public string Predict(DecoderArgs _property)
        {
            ParallelOptions parallelOption = new ParallelOptions();
            StringWriter    sw = null, swSeg = null;

            if (_property.outputstyle == 0)
            {
                sw = new StringWriter();
            }
            else
            {
                swSeg = new StringWriter();
            }


            ConcurrentQueue <List <List <string> > > queueRecords = new ConcurrentQueue <List <List <string> > >();
            ConcurrentQueue <List <List <string> > > queueSegRecords = new ConcurrentQueue <List <List <string> > >();

            StringReader sr = new StringReader(_property.predictstring);

            parallelOption.MaxDegreeOfParallelism = _property.thread;
            Parallel.For(0, parallelOption.MaxDegreeOfParallelism, t =>
            {
                //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance
                SegDecoderTagger tagger = _crfWrapper.CreateTagger();
                tagger.set_nbest(_property.nbest);
                tagger.set_vlevel(_property.probLevel);

                //Initialize result
                crf_seg_out[] crf_out = new crf_seg_out[_property.nbest];
                for (int i = 0; i < _property.nbest; i++)
                {
                    crf_out[i] = new crf_seg_out();
                }

                List <List <string> > inbuf = new List <List <string> >();
                while (true)
                {
                    lock (rdLocker)
                    {
                        if (ReadRecord(inbuf, sr) == false)
                        {
                            break;
                        }

                        queueRecords.Enqueue(inbuf);
                        queueSegRecords.Enqueue(inbuf);
                    }

                    //Call CRFSharp wrapper to predict given string's tags
                    if (swSeg != null)
                    {
                        _crfWrapper.Segment(crf_out, tagger, inbuf);
                    }
                    else
                    {
                        _crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf);
                    }

                    List <List <string> > peek = null;

                    //Save segmented tagged result into file
                    if (swSeg != null)
                    {
                        List <string> rstList = ConvertCRFTermOutToStringList(inbuf, crf_out);
                        while (peek != inbuf)
                        {
                            queueSegRecords.TryPeek(out peek);
                        }
                        foreach (string item in rstList)
                        {
                            swSeg.WriteLine(item);
                        }
                        queueSegRecords.TryDequeue(out peek);
                        peek = null;
                    }

                    //Save raw tagged result (with probability) into file
                    if (sw != null)
                    {
                        while (peek != inbuf)
                        {
                            queueRecords.TryPeek(out peek);
                        }
                        OutputRawResult(inbuf, crf_out, tagger, sw);
                        queueRecords.TryDequeue(out peek);
                    }
                }
            });



            if (sw != null)
            {
                sw.Close();
                return(sw.ToString());
            }
            if (swSeg != null)
            {
                swSeg.Close();
                return(swSeg.ToString());
            }

            return("");
        }
Beispiel #4
0
        //Output raw result with probability
        private string OutputRawResult(List <List <string> > inbuf, crf_term_out[] crf_out, SegDecoderTagger tagger, StringWriter sw)
        {
            //best N-result
            for (int k = 0; k < crf_out.Length; k++)
            {
                if (crf_out[k] == null)
                {
                    //No more result
                    break;
                }

                StringBuilder sb = new StringBuilder();

                crf_term_out crf_seg_out = crf_out[k];
                //Show the entire sequence probability
                //For each token
                for (int i = 0; i < inbuf.Count; i++)
                {
                    //Show all features
                    for (int j = 0; j < inbuf[i].Count; j++)
                    {
                        sb.Append(inbuf[i][j]);
                        sb.Append("\t");
                    }

                    //Show the best result and its probability
                    sb.Append(crf_seg_out.result_[i]);

                    if (tagger.vlevel_ > 1)
                    {
                        sb.Append("\t");
                        sb.Append(crf_seg_out.weight_[i]);

                        //Show the probability of all tags
                        sb.Append("\t");
                        for (int j = 0; j < tagger.ysize_; j++)
                        {
                            sb.Append(tagger.yname(j));
                            sb.Append("/");
                            sb.Append(tagger.prob(i, j));

                            if (j < tagger.ysize_ - 1)
                            {
                                sb.Append("\t");
                            }
                        }
                    }
                    sb.AppendLine();
                }
                if (tagger.vlevel_ > 0)
                {
                    sw.WriteLine("#{0}", crf_seg_out.prob);
                }
                sw.WriteLine(sb.ToString().Trim());
                sw.WriteLine();
            }


            return(sw.ToString());
        }