//Convert CRFSharp output format to string list private List <string> ConvertCRFTermOutToStringList(List <List <string> > inbuf, crf_seg_out[] crf_out) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < inbuf.Count; i++) { sb.Append(inbuf[i][0]); } string strText = sb.ToString(); List <string> rstList = new List <string>(); for (int i = 0; i < crf_out.Length; i++) { if (crf_out[i] == null) { //No more result break; } sb.Clear(); crf_seg_out crf_term_out = crf_out[i]; for (int j = 0; j < crf_term_out.Count; j++) { string str = strText.Substring(crf_term_out.tokenList[j].offset, crf_term_out.tokenList[j].length); string strNE = crf_term_out.tokenList[j].strTag; sb.Append(str); if (strNE.Length > 0) { sb.Append("[" + strNE + "]"); } sb.Append(" "); } rstList.Add(sb.ToString().Trim()); } return(rstList); }
public string Predict(DecoderArgs _property) { ParallelOptions parallelOption = new ParallelOptions(); StringWriter sw = null, swSeg = null; if (_property.outputstyle == 0) { sw = new StringWriter(); } else { swSeg = new StringWriter(); } ConcurrentQueue <List <List <string> > > queueRecords = new ConcurrentQueue <List <List <string> > >(); ConcurrentQueue <List <List <string> > > queueSegRecords = new ConcurrentQueue <List <List <string> > >(); StringReader sr = new StringReader(_property.predictstring); parallelOption.MaxDegreeOfParallelism = _property.thread; Parallel.For(0, parallelOption.MaxDegreeOfParallelism, t => { //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance SegDecoderTagger tagger = _crfWrapper.CreateTagger(); tagger.set_nbest(_property.nbest); tagger.set_vlevel(_property.probLevel); //Initialize result crf_seg_out[] crf_out = new crf_seg_out[_property.nbest]; for (int i = 0; i < _property.nbest; i++) { crf_out[i] = new crf_seg_out(); } List <List <string> > inbuf = new List <List <string> >(); while (true) { lock (rdLocker) { if (ReadRecord(inbuf, sr) == false) { break; } queueRecords.Enqueue(inbuf); queueSegRecords.Enqueue(inbuf); } //Call CRFSharp wrapper to predict given string's tags if (swSeg != null) { _crfWrapper.Segment(crf_out, tagger, inbuf); } else { _crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf); } List <List <string> > peek = null; //Save segmented tagged result into file if (swSeg != null) { List <string> rstList = ConvertCRFTermOutToStringList(inbuf, crf_out); while (peek != inbuf) { queueSegRecords.TryPeek(out peek); } foreach (string item in rstList) { swSeg.WriteLine(item); } queueSegRecords.TryDequeue(out peek); peek = null; } //Save raw tagged result (with probability) into file if (sw != null) { while (peek != inbuf) { queueRecords.TryPeek(out peek); } OutputRawResult(inbuf, crf_out, tagger, sw); queueRecords.TryDequeue(out peek); } } }); if (sw != null) { sw.Close(); return(sw.ToString()); } if (swSeg != null) { swSeg.Close(); return(swSeg.ToString()); } return(""); }
int seg_termbuf_build(crf_seg_out term_buf) { term_buf.Clear(); //build raw result at first int iRet = termbuf_build(term_buf); if (iRet != Utils.ERROR_SUCCESS) { return(iRet); } //Then build token result int term_len = 0; double weight = 0.0; int num = 0; for (int i = 0; i < x_.Count; i++) { //Adding the length of current token string strTag = term_buf.result_[i]; term_len += x_[i][0].Length; weight += term_buf.weight_[i]; num++; //Check if current term is the end of a token if ((strTag.StartsWith("B_") == false && strTag.StartsWith("M_") == false) || i == x_.Count - 1) { SegToken tkn = new SegToken(); tkn.length = term_len; tkn.offset = term_buf.termTotalLength; int spos = strTag.IndexOf('_'); if (spos < 0) { if (strTag == "NOR") { tkn.strTag = ""; } else { tkn.strTag = strTag; } } else { tkn.strTag = strTag.Substring(spos + 1); } term_buf.termTotalLength += term_len; //Calculate each token's weight switch (vlevel_) { case 0: tkn.fWeight = 0.0; break; case 2: tkn.fWeight = weight / num; weight = 0.0; num = 0; break; } term_buf.tokenList.Add(tkn); term_len = 0; } } return(Utils.ERROR_SUCCESS); }