Ejemplo n.º 1
0
        //Read each line from strTextFileName, and verify wether terms in every line are in strDictFileName
        public static void Match(string strTextFileName, DictMatch match)
        {
            List <Lemma> dm_r       = new List <Lemma>();
            List <int>   offsetList = new List <int>();

            StreamReader sr = new StreamReader(strTextFileName);

            while (sr.EndOfStream == false)
            {
                string strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    continue;
                }

                dm_r.Clear();
                offsetList.Clear();
                match.Search(strLine, ref dm_r, ref offsetList, DictMatch.DM_OUT_FMM);

                //if dm_r.Count > 0, it means some contigous terms in strLine have matched terms in the dictionary.
                for (int i = 0; i < dm_r.Count; i++)
                {
                    uint   len     = dm_r[i].len;
                    int    offset  = offsetList[i];
                    string strProp = dm_r[i].strProp;
                    string strTerm = strLine.Substring(offset, (int)len);
                    Console.WriteLine("Matched term: {0}[offset:{1}, len:{2}, prop:{3}]", strTerm, offset, len, strProp);
                }
            }
            sr.Close();
        }
Ejemplo n.º 2
0
        //Read each line from strTextFileName, and verify wether terms in every line are in strDictFileName
        public static void Match(string inputFilePath, string outputFilePath, DictMatch match)
        {
            List <Lemma> dm_r       = new List <Lemma>();
            List <int>   offsetList = new List <int>();

            StreamReader sr = new StreamReader(inputFilePath);
            StreamWriter sw = new StreamWriter(outputFilePath);

            while (sr.EndOfStream == false)
            {
                string?line = sr.ReadLine();
                if (line == null || line.Length == 0)
                {
                    continue;
                }

                dm_r.Clear();
                offsetList.Clear();
                match.Search(line, ref dm_r, ref offsetList, DictMatch.DM_OUT_FMM);

                //if dm_r.Count > 0, it means some contigous terms in strLine have matched terms in the dictionary.
                StringBuilder sb         = new StringBuilder();
                int           currOffset = 0;

                for (int i = 0; i < dm_r.Count; i++)
                {
                    uint   len     = dm_r[i].len;
                    int    offset  = offsetList[i];
                    string strProp = dm_r[i].strProp;
                    string strTerm = line.Substring(offset, (int)len);

                    if (offset > currOffset)
                    {
                        sb.Append(line.Substring(currOffset, offset - currOffset));
                    }

                    sb.Append($" <{strProp}> {strTerm} </{strProp}> ");

                    currOffset = (int)(offset + len);
                }

                if (currOffset < line.Length)
                {
                    sb.Append(line.Substring(currOffset));
                }

                sw.WriteLine(sb.ToString().Replace("  ", " "));
            }
            sr.Close();
            sw.Close();
        }
        public List <List <string> > GenerateFeature(string strText)
        {
            var rstListList = new List <List <string> >();

            if (dictmatch == null)
            {
                return(rstListList);
            }

            dm_r.Clear();
            dm_offsetList.Clear();
            dictmatch.Search(strText, ref dm_r, ref dm_offsetList, DictMatch.DM_OUT_FMM);

            string [] astrDictMatch;
            astrDictMatch = new string[strText.Length];

            for (var i = 0; i < dm_r.Count; i++)
            {
                var offset = dm_offsetList[i];
                var len    = (int)dm_r[i].len;

                for (var j = offset; j < offset + len; j++)
                {
                    astrDictMatch[j] = dm_r[i].strProp;
                }
            }

            for (var i = 0; i < strText.Length; i++)
            {
                rstListList.Add(new List <string>());
                rstListList[i].Add(strText[i].ToString());

                if (astrDictMatch[i] != null)
                {
                    rstListList[i].Add(astrDictMatch[i]);
                }
                else
                {
                    rstListList[i].Add("NOR");
                }
            }


            return(rstListList);
        }