//Read each line from strTextFileName, and verify wether terms in every line are in strDictFileName public static void Match(string strTextFileName, DictMatch match) { List <Lemma> dm_r = new List <Lemma>(); List <int> offsetList = new List <int>(); StreamReader sr = new StreamReader(strTextFileName); while (sr.EndOfStream == false) { string strLine = sr.ReadLine(); if (strLine.Length == 0) { continue; } dm_r.Clear(); offsetList.Clear(); match.Search(strLine, ref dm_r, ref offsetList, DictMatch.DM_OUT_FMM); //if dm_r.Count > 0, it means some contigous terms in strLine have matched terms in the dictionary. for (int i = 0; i < dm_r.Count; i++) { uint len = dm_r[i].len; int offset = offsetList[i]; string strProp = dm_r[i].strProp; string strTerm = strLine.Substring(offset, (int)len); Console.WriteLine("Matched term: {0}[offset:{1}, len:{2}, prop:{3}]", strTerm, offset, len, strProp); } } sr.Close(); }
//Read each line from strTextFileName, and verify wether terms in every line are in strDictFileName public static void Match(string inputFilePath, string outputFilePath, DictMatch match) { List <Lemma> dm_r = new List <Lemma>(); List <int> offsetList = new List <int>(); StreamReader sr = new StreamReader(inputFilePath); StreamWriter sw = new StreamWriter(outputFilePath); while (sr.EndOfStream == false) { string?line = sr.ReadLine(); if (line == null || line.Length == 0) { continue; } dm_r.Clear(); offsetList.Clear(); match.Search(line, ref dm_r, ref offsetList, DictMatch.DM_OUT_FMM); //if dm_r.Count > 0, it means some contigous terms in strLine have matched terms in the dictionary. StringBuilder sb = new StringBuilder(); int currOffset = 0; for (int i = 0; i < dm_r.Count; i++) { uint len = dm_r[i].len; int offset = offsetList[i]; string strProp = dm_r[i].strProp; string strTerm = line.Substring(offset, (int)len); if (offset > currOffset) { sb.Append(line.Substring(currOffset, offset - currOffset)); } sb.Append($" <{strProp}> {strTerm} </{strProp}> "); currOffset = (int)(offset + len); } if (currOffset < line.Length) { sb.Append(line.Substring(currOffset)); } sw.WriteLine(sb.ToString().Replace(" ", " ")); } sr.Close(); sw.Close(); }
public List <List <string> > GenerateFeature(string strText) { var rstListList = new List <List <string> >(); if (dictmatch == null) { return(rstListList); } dm_r.Clear(); dm_offsetList.Clear(); dictmatch.Search(strText, ref dm_r, ref dm_offsetList, DictMatch.DM_OUT_FMM); string [] astrDictMatch; astrDictMatch = new string[strText.Length]; for (var i = 0; i < dm_r.Count; i++) { var offset = dm_offsetList[i]; var len = (int)dm_r[i].len; for (var j = offset; j < offset + len; j++) { astrDictMatch[j] = dm_r[i].strProp; } } for (var i = 0; i < strText.Length; i++) { rstListList.Add(new List <string>()); rstListList[i].Add(strText[i].ToString()); if (astrDictMatch[i] != null) { rstListList[i].Add(astrDictMatch[i]); } else { rstListList[i].Add("NOR"); } } return(rstListList); }