Exemple #1
0
        // public methods
        // private methods
        private static void Test()
        {
            int             tarIndex         = 6;            // target index
            int             startIndex       = 4;            // start index of merge
            int             endIndex         = 6;            // end index of merge
            int             tarPos           = 3;            // target pos
            int             startPos         = 2;            // start pos of merge
            int             endPos           = 3;            // end pos of merge
            int             mergeNo          = 1;            // total no of merged tokens
            string          tarWord          = "gnosed";     // target term
            string          mergeWord        = "diagnosed."; // suggested merged terms
            string          coreMergeWord    = "diagnosed";  // core suggested merged terms
            string          orgMergeWord     = "dia gnosed"; // org word b4 merge
            MergeObj        mergeObj         = new MergeObj(tarWord, orgMergeWord, mergeWord, coreMergeWord, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos);
            string          inText           = "He is dia gnosed last week.";
            List <TokenObj> inTextList       = TextObj.TextToTokenList(inText);
            List <TokenObj> nonSpaceTextList = TextObj.GetNonSpaceTokenObjList(inTextList);

            Console.WriteLine("------ Merge Obj -------");
            Console.WriteLine(mergeObj.ToString());
            Console.WriteLine("------ Non Merge Term -------");
            string nonMergeTerm = GetNonMergeTerm(mergeObj, nonSpaceTextList);

            Console.WriteLine("- inText: [" + inText + "]");
            Console.WriteLine("- nonMergeTerm: [" + nonMergeTerm + "]");
        }
        // private methods
        private static bool IsValidMergeCand(MergeObj mergeObj, CSpellApi cSpellApi)
        {
            // WC is not used here
            WordWcMap wordWcMap        = cSpellApi.GetWordWcMap();
            Word2Vec  word2VecOm       = cSpellApi.GetWord2VecOm();
            string    coreMergeStr     = mergeObj.GetCoreMergeWord();
            int       rwMergeCandMinWc = cSpellApi.GetCanRwMergeCandMinWc();
            bool      flag             = ((word2VecOm.HasWordVec(coreMergeStr)) && (WordCountScore.GetWc(coreMergeStr, wordWcMap) >= rwMergeCandMinWc));

            return(flag);
        }
Exemple #3
0
        private static void AddMergeObj(string tarWord, string orgMergeWord, string mergeWord, int mergeNo, int startIndex, int tarIndex, int endIndex, int startPos, int tarPos, int endPos, HashSet <MergeObj> mergeSet, RootDictionary suggestDic, RootDictionary aADic)
        {
            // 1. convert merged word to coreTerm
            int  ctType = CoreTermUtil.CT_TYPE_SPACE_PUNC;
            bool lcFlag = true;
            // only take care of the end punctuation for the coreTerm
            string coreStr = TermUtil.StripEndPuncSpace(mergeWord);

            // 2. check if the coreStr of mergeWord is in suggest Dic
            // the merge word is not a Aa, assuming no merge for Aa
            // becase Aa is short enough
            if ((suggestDic.IsDicWord(coreStr) == true) && (aADic.IsDicWord(coreStr) == false))
            {
                MergeObj mergeObj = new MergeObj(tarWord, orgMergeWord, mergeWord, coreStr, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos);
                mergeSet.Add(mergeObj);
            }
        }
Exemple #4
0
        // get the simulated original term by add space tokens
        public static string GetNonMergeTerm(MergeObj mergeObj, List <TokenObj> nonSpaceTextList)
        {
            string nonMergeTerm = "";

            if ((mergeObj != null) && (nonSpaceTextList != null))
            {
                int startPos = mergeObj.GetStartPos();
                int endPos   = mergeObj.GetEndPos();
                nonMergeTerm = nonSpaceTextList[startPos].GetTokenStr();
                for (int i = startPos + 1; i <= endPos; i++)
                {
                    if ((i >= 0) && (i < nonSpaceTextList.Count))
                    {
                        nonMergeTerm += GlobalVars.SPACE_STR + nonSpaceTextList[i].GetTokenStr();
                    }
                    else                         // illegal index
                    {
                        break;
                    }
                }
            }
            return(nonMergeTerm);
        }