private List <Token> LabelString(Instance instance, List <string> termList) { //Extract features from given text List <List <string> > sinbuf = featureGenerator.GenerateFeature(termList); //Call CRFSharp to predict word formation tags int ret = crf.Segment(instance.crf_seg_out, instance.crf_tag, sinbuf); //Only use 1st-best result crf_seg_out item = instance.crf_seg_out[0]; if (ret < 0 || item.Count != termList.Count) { //CRF parsing is failed string strMessage = "Failed to parse word formation by model. RetVal: " + ret.ToString() + ", Parsed Token Count: " + item.Count.ToString() + ", Input Token Count: " + termList.Count.ToString(); Console.WriteLine(strMessage); return(null); } //Fill the token list List <Token> tknList = new List <Token>(); for (int j = 0; j < item.Count; j++) { int offset = item.tokenList[j].offset; int len = item.tokenList[j].length; string strNE = item.tokenList[j].strTag; Token token = new Token(); token.strTerm = termList[j]; token.rankId = int.Parse(strNE.Substring(strRankTagPrefix.Length)); tknList.Add(token); } return(tknList); }
bool Decode(CRFSharpWrapper.DecoderArgs options) { var parallelOption = new ParallelOptions(); if (File.Exists(options.strInputFileName) == false) { Console.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName); return false; } if (File.Exists(options.strModelFileName) == false) { Console.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName); return false; } var sr = new StreamReader(options.strInputFileName); StreamWriter sw = null, swSeg = null; if (options.strOutputFileName != null && options.strOutputFileName.Length > 0) { sw = new StreamWriter(options.strOutputFileName); } if (options.strOutputSegFileName != null && options.strOutputSegFileName.Length > 0) { swSeg = new StreamWriter(options.strOutputSegFileName); } //Create CRFSharp wrapper instance. It's a global instance var crfWrapper = new CRFSharpWrapper.Decoder(); //Load model from file if (crfWrapper.LoadModel(options.strModelFileName) == false) { return false; } var queueRecords = new ConcurrentQueue<List<List<string>>>(); var queueSegRecords = new ConcurrentQueue<List<List<string>>>(); parallelOption.MaxDegreeOfParallelism = options.thread; Parallel.For(0, options.thread, parallelOption, t => { //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance var tagger = crfWrapper.CreateTagger(options.nBest, options.maxword); tagger.set_vlevel(options.probLevel); //Initialize result var crf_out = new crf_seg_out[options.nBest]; for (var i = 0; i < options.nBest; i++) { crf_out[i] = new crf_seg_out(tagger.crf_max_word_num); } var inbuf = new List<List<string>>(); while (true) { lock (rdLocker) { if (ReadRecord(inbuf, sr) == false) { break; } queueRecords.Enqueue(inbuf); queueSegRecords.Enqueue(inbuf); } //Call CRFSharp wrapper to predict given string's tags if (swSeg != null) { crfWrapper.Segment(crf_out, tagger, inbuf); } else { crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf); } List<List<string>> peek = null; //Save segmented tagged result into file if (swSeg != null) { var rstList = ConvertCRFTermOutToStringList(inbuf, crf_out); while (peek != inbuf) { queueSegRecords.TryPeek(out peek); } for (int index = 0; index < rstList.Count; index++) { var item = rstList[index]; swSeg.WriteLine(item); } queueSegRecords.TryDequeue(out peek); peek = null; } //Save raw tagged result (with probability) into file if (sw != null) { while (peek != inbuf) { queueRecords.TryPeek(out peek); } OutputRawResultToFile(inbuf, crf_out, tagger, sw); queueRecords.TryDequeue(out peek); } } }); sr.Close(); if (sw != null) { sw.Close(); } if (swSeg != null) { swSeg.Close(); } return true; }
bool Decode(CRFSharpWrapper.DecoderArgs options) { var parallelOption = new ParallelOptions(); var watch = Stopwatch.StartNew(); if (File.Exists(options.strInputFileName) == false) { Logger.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName); return(false); } if (File.Exists(options.strModelFileName) == false) { Logger.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName); return(false); } var sr = new StreamReader(options.strInputFileName); StreamWriter sw = null, swSeg = null; if (options.strOutputFileName != null && options.strOutputFileName.Length > 0) { sw = new StreamWriter(options.strOutputFileName); } if (options.strOutputSegFileName != null && options.strOutputSegFileName.Length > 0) { swSeg = new StreamWriter(options.strOutputSegFileName); } //Create CRFSharp wrapper instance. It's a global instance var crfWrapper = new CRFSharpWrapper.Decoder(); //Load encoded model from file Logger.WriteLine("Loading model from {0}", options.strModelFileName); crfWrapper.LoadModel(options.strModelFileName); var queueRecords = new ConcurrentQueue <List <List <string> > >(); var queueSegRecords = new ConcurrentQueue <List <List <string> > >(); parallelOption.MaxDegreeOfParallelism = options.thread; Parallel.For(0, options.thread, parallelOption, t => { //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance var tagger = crfWrapper.CreateTagger(options.nBest, options.maxword); tagger.set_vlevel(options.probLevel); //Initialize result var crf_out = new crf_seg_out[options.nBest]; for (var i = 0; i < options.nBest; i++) { crf_out[i] = new crf_seg_out(tagger.crf_max_word_num); } var inbuf = new List <List <string> >(); while (true) { lock (rdLocker) { if (ReadRecord(inbuf, sr) == false) { break; } queueRecords.Enqueue(inbuf); queueSegRecords.Enqueue(inbuf); } //Call CRFSharp wrapper to predict given string's tags if (swSeg != null) { crfWrapper.Segment(crf_out, tagger, inbuf); } else { crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf); } List <List <string> > peek = null; //Save segmented tagged result into file if (swSeg != null) { var rstList = ConvertCRFTermOutToStringList(inbuf, crf_out); while (peek != inbuf) { queueSegRecords.TryPeek(out peek); } for (int index = 0; index < rstList.Count; index++) { var item = rstList[index]; swSeg.WriteLine(item); } queueSegRecords.TryDequeue(out peek); peek = null; } //Save raw tagged result (with probability) into file if (sw != null) { while (peek != inbuf) { queueRecords.TryPeek(out peek); } OutputRawResultToFile(inbuf, crf_out, tagger, sw); queueRecords.TryDequeue(out peek); } } }); sr.Close(); if (sw != null) { sw.Close(); } if (swSeg != null) { swSeg.Close(); } watch.Stop(); Logger.WriteLine("Elapsed: {0} ms", watch.ElapsedMilliseconds); return(true); }
bool Decode(CRFSharpWrapper.DecoderArgs options) { ParallelOptions parallelOption = new ParallelOptions(); if (File.Exists(options.strInputFileName) == false) { Console.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName); return(false); } if (File.Exists(options.strModelFileName) == false) { Console.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName); return(false); } StreamReader sr = new StreamReader(options.strInputFileName); StreamWriter sw = null, swSeg = null; if (options.strOutputFileName != null && options.strOutputFileName.Length > 0) { sw = new StreamWriter(options.strOutputFileName); } if (options.strOutputSegFileName != null && options.strOutputSegFileName.Length > 0) { swSeg = new StreamWriter(options.strOutputSegFileName); } //Create CRFSharp wrapper instance. It's a global instance CRFSharpWrapper.Decoder crfWrapper = new CRFSharpWrapper.Decoder(); //Load model from file if (crfWrapper.LoadModel(options.strModelFileName) == false) { return(false); } ConcurrentQueue <List <List <string> > > queueRecords = new ConcurrentQueue <List <List <string> > >(); ConcurrentQueue <List <List <string> > > queueSegRecords = new ConcurrentQueue <List <List <string> > >(); parallelOption.MaxDegreeOfParallelism = options.thread; Parallel.For(0, options.thread, parallelOption, t => { //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance SegDecoderTagger tagger = crfWrapper.CreateTagger(options.nBest); tagger.set_vlevel(options.probLevel); //Initialize result crf_seg_out[] crf_out = new crf_seg_out[options.nBest]; for (int i = 0; i < options.nBest; i++) { crf_out[i] = new crf_seg_out(); } List <List <string> > inbuf = new List <List <string> >(); while (true) { lock (rdLocker) { if (ReadRecord(inbuf, sr) == false) { break; } queueRecords.Enqueue(inbuf); queueSegRecords.Enqueue(inbuf); } //Call CRFSharp wrapper to predict given string's tags if (swSeg != null) { crfWrapper.Segment(crf_out, tagger, inbuf); } else { crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf); } List <List <string> > peek = null; //Save segmented tagged result into file if (swSeg != null) { List <string> rstList = ConvertCRFTermOutToStringList(inbuf, crf_out); while (peek != inbuf) { queueSegRecords.TryPeek(out peek); } foreach (string item in rstList) { swSeg.WriteLine(item); } queueSegRecords.TryDequeue(out peek); peek = null; } //Save raw tagged result (with probability) into file if (sw != null) { while (peek != inbuf) { queueRecords.TryPeek(out peek); } OutputRawResultToFile(inbuf, crf_out, tagger, sw); queueRecords.TryDequeue(out peek); } } }); sr.Close(); if (sw != null) { sw.Close(); } if (swSeg != null) { swSeg.Close(); } return(true); }