// public method // process public static List <TokenObj> Process(List <TokenObj> inTokenList, CSpellApi cSpellApi, bool debugFlag) { DebugPrint.PrintProcess("5. RealWord-Merge", debugFlag); DebugPrint.PrintInText(TextObj.TokenListToText(inTokenList), debugFlag); // pre-porcess // update Pos for the inTokenList TextObj.UpdateIndexPos(inTokenList); // 1. remove non space-token and convert to non-space-token list List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList); // 2. process: go through each token for detection and correction // to find merge corrections (mergeObjList) int index = 0; List <MergeObj> mergeObjList = new List <MergeObj>(); int maxLegitTokenLength = cSpellApi.GetMaxLegitTokenLength(); while (index < inTokenList.Count) { TokenObj curTokenObj = inTokenList[index]; // update the tarPos // SCR-3, use legit token if (curTokenObj.IsLegitToken(maxLegitTokenLength) == true) { int tarPos = inTokenList[index].GetPos(); // correct term is the highest ranked candidates MergeObj mergeObj = RealWordMergeCorrector.GetCorrectTerm(tarPos, nonSpaceTokenList, cSpellApi, debugFlag); if (mergeObj == null) // no merge correction { index++; } else // has merge correction { mergeObjList.Add(mergeObj); // next token after end token, this ensures no overlap merge index = mergeObj.GetEndIndex() + 1; } } else // space token // update index { index++; } } // update the output for merge for the whole inTokenList, // has to update after the loop bz merge might // happen to the previous token // update the tokenObj up to the merge, then go to the next token // update operation info also List <TokenObj> outTokenList = MergeCorrector.CorrectTokenListByMerge(inTokenList, mergeObjList, TokenObj.HIST_RW_M, debugFlag, cSpellApi); return(outTokenList); }