public SegDecoderTagger CreateTagger() { if (modelReader == null) { return(null); } SegDecoderTagger tagger = new SegDecoderTagger(); tagger.init_by_model(modelReader); return(tagger); }
//Segment given text public int Segment(crf_seg_out[] pout, //segment result SegDecoderTagger tagger, //Tagger per thread List <List <string> > inbuf //feature set for segment ) { int ret = 0; if (inbuf.Count == 0) { //Empty input string return(Utils.ERROR_SUCCESS); } ret = tagger.reset(); if (ret < 0) { return(ret); } ret = tagger.add(inbuf); if (ret < 0) { return(ret); } //parse ret = tagger.parse(); if (ret < 0) { return(ret); } //wrap result ret = tagger.output(pout); if (ret < 0) { return(ret); } return(Utils.ERROR_SUCCESS); }
public string Predict(DecoderArgs _property) { ParallelOptions parallelOption = new ParallelOptions(); StringWriter sw = null, swSeg = null; if (_property.outputstyle == 0) { sw = new StringWriter(); } else { swSeg = new StringWriter(); } ConcurrentQueue <List <List <string> > > queueRecords = new ConcurrentQueue <List <List <string> > >(); ConcurrentQueue <List <List <string> > > queueSegRecords = new ConcurrentQueue <List <List <string> > >(); StringReader sr = new StringReader(_property.predictstring); parallelOption.MaxDegreeOfParallelism = _property.thread; Parallel.For(0, parallelOption.MaxDegreeOfParallelism, t => { //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance SegDecoderTagger tagger = _crfWrapper.CreateTagger(); tagger.set_nbest(_property.nbest); tagger.set_vlevel(_property.probLevel); //Initialize result crf_seg_out[] crf_out = new crf_seg_out[_property.nbest]; for (int i = 0; i < _property.nbest; i++) { crf_out[i] = new crf_seg_out(); } List <List <string> > inbuf = new List <List <string> >(); while (true) { lock (rdLocker) { if (ReadRecord(inbuf, sr) == false) { break; } queueRecords.Enqueue(inbuf); queueSegRecords.Enqueue(inbuf); } //Call CRFSharp wrapper to predict given string's tags if (swSeg != null) { _crfWrapper.Segment(crf_out, tagger, inbuf); } else { _crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf); } List <List <string> > peek = null; //Save segmented tagged result into file if (swSeg != null) { List <string> rstList = ConvertCRFTermOutToStringList(inbuf, crf_out); while (peek != inbuf) { queueSegRecords.TryPeek(out peek); } foreach (string item in rstList) { swSeg.WriteLine(item); } queueSegRecords.TryDequeue(out peek); peek = null; } //Save raw tagged result (with probability) into file if (sw != null) { while (peek != inbuf) { queueRecords.TryPeek(out peek); } OutputRawResult(inbuf, crf_out, tagger, sw); queueRecords.TryDequeue(out peek); } } }); if (sw != null) { sw.Close(); return(sw.ToString()); } if (swSeg != null) { swSeg.Close(); return(swSeg.ToString()); } return(""); }
//Output raw result with probability private string OutputRawResult(List <List <string> > inbuf, crf_term_out[] crf_out, SegDecoderTagger tagger, StringWriter sw) { //best N-result for (int k = 0; k < crf_out.Length; k++) { if (crf_out[k] == null) { //No more result break; } StringBuilder sb = new StringBuilder(); crf_term_out crf_seg_out = crf_out[k]; //Show the entire sequence probability //For each token for (int i = 0; i < inbuf.Count; i++) { //Show all features for (int j = 0; j < inbuf[i].Count; j++) { sb.Append(inbuf[i][j]); sb.Append("\t"); } //Show the best result and its probability sb.Append(crf_seg_out.result_[i]); if (tagger.vlevel_ > 1) { sb.Append("\t"); sb.Append(crf_seg_out.weight_[i]); //Show the probability of all tags sb.Append("\t"); for (int j = 0; j < tagger.ysize_; j++) { sb.Append(tagger.yname(j)); sb.Append("/"); sb.Append(tagger.prob(i, j)); if (j < tagger.ysize_ - 1) { sb.Append("\t"); } } } sb.AppendLine(); } if (tagger.vlevel_ > 0) { sw.WriteLine("#{0}", crf_seg_out.prob); } sw.WriteLine(sb.ToString().Trim()); sw.WriteLine(); } return(sw.ToString()); }