// public method // TBD, dummy, use MergeCandidates.java.new as reference public static List <TokenObj> Process(List <TokenObj> inTokenList, CSpellApi cSpellApi, bool debugFlag) { DebugPrint.PrintProcess("2. NonWord-Merge", debugFlag); DebugPrint.PrintInText(TextObj.TokenListToText(inTokenList), debugFlag); // pre-porcess // update Pos for the inTokenList TextObj.UpdateIndexPos(inTokenList); // 1. remove non space-token and convert to non-space-token list List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList); // 2. process: go through each token for detection and correction // to find merge corrections (mergeObjList) int index = 0; int maxLegitTokenLength = cSpellApi.GetMaxLegitTokenLength(); List <MergeObj> mergeObjList = new List <MergeObj>(); while (index < inTokenList.Count) { TokenObj curTokenObj = inTokenList[index]; // SCR-3, use legit token if (curTokenObj.IsLegitToken(maxLegitTokenLength) == true) { int tarPos = inTokenList[index].GetPos(); // correct term is the highest ranked candidates MergeObj mergeObj = NonWordMergeCorrector.GetCorrectTerm(tarPos, nonSpaceTokenList, cSpellApi, debugFlag); if (mergeObj == null) // no merge correction { index++; } else // has merge correction { mergeObjList.Add(mergeObj); // next token after end token, this ensure no overlap merge index = mergeObj.GetEndIndex() + 1; } } else // space token // update index { index++; } } // update the output for merge for the whole inTokenList, // has to update after the loop bz merge might // happen to the previous token // update the tokenObj up to the merge, then go to the next token // update operation info also List <TokenObj> outTokenList = MergeCorrector.CorrectTokenListByMerge(inTokenList, mergeObjList, TokenObj.HIST_NW_M, debugFlag, cSpellApi); return(outTokenList); }
private static void TestGetCorrectTerm(CSpellApi cSpellApi) { // init // all lowerCase string inText = "Dur ing my absent."; bool debugFlag = false; List <TokenObj> inTokenList = TextObj.TextToTokenList(inText); // 1. convert to the non-empty token list List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList); // result int tarPos = 0; MergeObj mergeObj = NonWordMergeCorrector.GetCorrectTerm(tarPos, nonSpaceTokenList, cSpellApi, debugFlag); // print out Console.WriteLine("--------- GetCorrectTerm( ) -----------"); Console.WriteLine("In: [" + inText + "]"); Console.WriteLine("In nonSpaceTokenList: [" + nonSpaceTokenList.Count + "]"); Console.WriteLine("Out MergeObj: [" + mergeObj.ToString() + "]"); }