Пример #1
0
        /// <summary>
        /// Initialize DictMatch Feature Generator
        /// </summary>
        /// <returns></returns>
        public bool Initialize()
        {
            dictmatch = new DictMatch();
            dm_r = new List<Lemma>();
            dm_offsetList = new List<int>();

            Dictionary<string, string> configDict;
            configDict = LoadConfigFile("GenerateFeatureDictMatch.ini");

            if (configDict.ContainsKey(KEY_LEXICAL_DICT_FILE_NAME.ToLower()) == false ||
                configDict.ContainsKey(KEY_BINARY_DICT_TYPE.ToLower()) == false)
            {
                return false;
            }

            var strDictMatchFileName = configDict[KEY_LEXICAL_DICT_FILE_NAME.ToLower()];
            var bBinaryDict = bool.Parse(configDict[KEY_BINARY_DICT_TYPE.ToLower()]);

            if (strDictMatchFileName.Length == 0)
            {
                return true;
            }

            if (bBinaryDict == true)
            {
                dictmatch.LoadDictFromBinary(strDictMatchFileName);
            }
            else
            {
                dictmatch.LoadDictFromRawText(strDictMatchFileName);
            }
            return true;
        }
Пример #2
0
        //Read each line from strTextFileName, and verify wether terms in every line are in strDictFileName
        public static void Match(string strTextFileName, DictMatch match)
        {
            List<Lemma> dm_r = new List<Lemma>();
            List<int> offsetList = new List<int>();

            StreamReader sr = new StreamReader(strTextFileName);
            while (sr.EndOfStream == false)
            {
                string strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    continue;
                }

                dm_r.Clear();
                offsetList.Clear();
                match.Search(strLine, ref dm_r, ref offsetList, DictMatch.DM_OUT_FMM);

                //if dm_r.Count > 0, it means some contigous terms in strLine have matched terms in the dictionary.
                for (int i = 0; i < dm_r.Count; i++)
                {
                    uint len = dm_r[i].len;
                    int offset = offsetList[i];
                    string strProp = dm_r[i].strProp;
                    string strTerm = strLine.Substring(offset, (int)len);
                    Console.WriteLine("Matched term: {0}[offset:{1}, len:{2}, prop:{3}]", strTerm, offset, len, strProp);
                }
            }
            sr.Close();

        }
Пример #3
0
        public static void VerifyRawTextDict(string strTestFileName, string strRawDictFileName)
        {
            Console.WriteLine("Load raw text dictionary...");
            DictMatch match = new DictMatch();
            match.LoadDictFromRawText(strRawDictFileName);

            Console.WriteLine("Verify raw text dictionary...");
            Match(strTestFileName, match);
        }
Пример #4
0
        public static void VerifyBinaryDict(string strTestFileName, string strRawDictFileName)
        {
            Console.WriteLine("Convert dictionary from raw text to binary format.");
            DictMatch match = new DictMatch();
            match.ConvertDictFromRawTextToBinary(strRawDictFileName, strRawDictFileName + ".bin");

            Console.WriteLine("Load binary dictionary...");
            match = new DictMatch();
            match.LoadDictFromBinary(strRawDictFileName + ".bin");

            Console.WriteLine("Verify binary dictionary...");
            Match(strTestFileName, match);
        }