private static void AddMergeObj(string tarWord, string orgMergeWord, string mergeWord, int mergeNo, int startIndex, int tarIndex, int endIndex, int startPos, int tarPos, int endPos, HashSet <MergeObj> mergeSet, RootDictionary suggestDic, RootDictionary aADic) { // 1. convert merged word to coreTerm int ctType = CoreTermUtil.CT_TYPE_SPACE_PUNC; bool lcFlag = true; // only take care of the end punctuation for the coreTerm string coreStr = TermUtil.StripEndPuncSpace(mergeWord); // 2. check if the coreStr of mergeWord is in suggest Dic // the merge word is not a Aa, assuming no merge for Aa // becase Aa is short enough if ((suggestDic.IsDicWord(coreStr) == true) && (aADic.IsDicWord(coreStr) == false)) { MergeObj mergeObj = new MergeObj(tarWord, orgMergeWord, mergeWord, coreStr, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos); mergeSet.Add(mergeObj); } }
// public method /// <summary> /// The core method to correct a word by following steps: /// <ul> /// <li>Convert inToken to removeEndPuncStr /// <li>detect if misspell (OOV) - non-word, exclude Aa /// <li>get candidates /// <ul> /// <li>get candidates from merge. /// </ul> /// <li>Rank candidates /// <ul> /// <li>orthographic /// <li>frequency /// <li>context /// </ul> /// <li>Update information /// /// </ul> /// </summary> /// <param name="tarPos"> postion of target token </param> /// <param name="nonSpaceTokenList"> token list without space token(s) </param> /// <param name="cSpellApi"> CSpell Api object </param> /// <param name="debugFlag"> flag for debug print /// </param> /// <returns> the corrected merged word in MergeObj if the token is OOV /// and suggested merged word found. /// Otherwise, a null of MergeObj is returned. </returns> // return the original term if no good correctin are found public static MergeObj GetCorrectTerm(int tarPos, List <TokenObj> nonSpaceTokenList, CSpellApi cSpellApi, bool debugFlag) { // get tarWord from tarTokenObj and init outTokenObj TokenObj tarTokenObj = nonSpaceTokenList[tarPos]; string tarWord = tarTokenObj.GetTokenStr(); MergeObj outMergeObj = null; // no merge if it is null // 1. only remove ending punctuation for coreTerm string coreStr = TermUtil.StripEndPuncSpace(tarWord).ToLower(); // 2. non-word correction // check if tarWord and removeEndPuncStr is OOV if (NonWordMergeDetector.IsDetect(tarWord, coreStr, cSpellApi, debugFlag) == true) { cSpellApi.UpdateDetectNo(); // 3. get candidates from merge HashSet <MergeObj> mergeSet = NonWordMergeCandidates.GetCandidates(tarPos, nonSpaceTokenList, cSpellApi); // 4. Ranking: get top ranked candidates as corrected terms // 4.1 just use frenquency or context, no orthoGraphic // in case of using context outMergeObj = RankNonWordMergeByMode.GetTopRankMergeObj(mergeSet, cSpellApi, tarPos, nonSpaceTokenList, debugFlag); } return(outMergeObj); }