// public methods // private methods private static void Test() { int tarIndex = 6; // target index int startIndex = 4; // start index of merge int endIndex = 6; // end index of merge int tarPos = 3; // target pos int startPos = 2; // start pos of merge int endPos = 3; // end pos of merge int mergeNo = 1; // total no of merged tokens string tarWord = "gnosed"; // target term string mergeWord = "diagnosed."; // suggested merged terms string coreMergeWord = "diagnosed"; // core suggested merged terms string orgMergeWord = "dia gnosed"; // org word b4 merge MergeObj mergeObj = new MergeObj(tarWord, orgMergeWord, mergeWord, coreMergeWord, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos); string inText = "He is dia gnosed last week."; List <TokenObj> inTextList = TextObj.TextToTokenList(inText); List <TokenObj> nonSpaceTextList = TextObj.GetNonSpaceTokenObjList(inTextList); Console.WriteLine("------ Merge Obj -------"); Console.WriteLine(mergeObj.ToString()); Console.WriteLine("------ Non Merge Term -------"); string nonMergeTerm = GetNonMergeTerm(mergeObj, nonSpaceTextList); Console.WriteLine("- inText: [" + inText + "]"); Console.WriteLine("- nonMergeTerm: [" + nonMergeTerm + "]"); }
// private methods private static bool IsValidMergeCand(MergeObj mergeObj, CSpellApi cSpellApi) { // WC is not used here WordWcMap wordWcMap = cSpellApi.GetWordWcMap(); Word2Vec word2VecOm = cSpellApi.GetWord2VecOm(); string coreMergeStr = mergeObj.GetCoreMergeWord(); int rwMergeCandMinWc = cSpellApi.GetCanRwMergeCandMinWc(); bool flag = ((word2VecOm.HasWordVec(coreMergeStr)) && (WordCountScore.GetWc(coreMergeStr, wordWcMap) >= rwMergeCandMinWc)); return(flag); }
private static void AddMergeObj(string tarWord, string orgMergeWord, string mergeWord, int mergeNo, int startIndex, int tarIndex, int endIndex, int startPos, int tarPos, int endPos, HashSet <MergeObj> mergeSet, RootDictionary suggestDic, RootDictionary aADic) { // 1. convert merged word to coreTerm int ctType = CoreTermUtil.CT_TYPE_SPACE_PUNC; bool lcFlag = true; // only take care of the end punctuation for the coreTerm string coreStr = TermUtil.StripEndPuncSpace(mergeWord); // 2. check if the coreStr of mergeWord is in suggest Dic // the merge word is not a Aa, assuming no merge for Aa // becase Aa is short enough if ((suggestDic.IsDicWord(coreStr) == true) && (aADic.IsDicWord(coreStr) == false)) { MergeObj mergeObj = new MergeObj(tarWord, orgMergeWord, mergeWord, coreStr, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos); mergeSet.Add(mergeObj); } }
// get the simulated original term by add space tokens public static string GetNonMergeTerm(MergeObj mergeObj, List <TokenObj> nonSpaceTextList) { string nonMergeTerm = ""; if ((mergeObj != null) && (nonSpaceTextList != null)) { int startPos = mergeObj.GetStartPos(); int endPos = mergeObj.GetEndPos(); nonMergeTerm = nonSpaceTextList[startPos].GetTokenStr(); for (int i = startPos + 1; i <= endPos; i++) { if ((i >= 0) && (i < nonSpaceTextList.Count)) { nonMergeTerm += GlobalVars.SPACE_STR + nonSpaceTextList[i].GetTokenStr(); } else // illegal index { break; } } } return(nonMergeTerm); }