示例#1
0
        // public method
        // TBD, dummy, use MergeCandidates.java.new as reference
        public static List <TokenObj> Process(List <TokenObj> inTokenList, CSpellApi cSpellApi, bool debugFlag)
        {
            DebugPrint.PrintProcess("2. NonWord-Merge", debugFlag);
            DebugPrint.PrintInText(TextObj.TokenListToText(inTokenList), debugFlag);
            // pre-porcess
            // update Pos for the inTokenList
            TextObj.UpdateIndexPos(inTokenList);
            // 1. remove non space-token and convert to non-space-token list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList);
            // 2. process: go through each token for detection and correction
            // to find merge corrections (mergeObjList)
            int             index = 0;
            int             maxLegitTokenLength = cSpellApi.GetMaxLegitTokenLength();
            List <MergeObj> mergeObjList        = new List <MergeObj>();

            while (index < inTokenList.Count)
            {
                TokenObj curTokenObj = inTokenList[index];

                // SCR-3, use legit token
                if (curTokenObj.IsLegitToken(maxLegitTokenLength) == true)
                {
                    int tarPos = inTokenList[index].GetPos();
                    // correct term is the highest ranked candidates
                    MergeObj mergeObj = NonWordMergeCorrector.GetCorrectTerm(tarPos, nonSpaceTokenList, cSpellApi, debugFlag);
                    if (mergeObj == null)                       // no merge correction
                    {
                        index++;
                    }
                    else                         // has merge correction
                    {
                        mergeObjList.Add(mergeObj);
                        // next token after end token, this ensure no overlap merge
                        index = mergeObj.GetEndIndex() + 1;
                    }
                }
                else                     // space token
                                         // update index
                {
                    index++;
                }
            }
            // update the output for merge for the whole inTokenList,
            // has to update after the loop bz merge might
            // happen to the previous token
            // update the tokenObj up to the merge, then go to the next token
            // update operation info also
            List <TokenObj> outTokenList = MergeCorrector.CorrectTokenListByMerge(inTokenList, mergeObjList, TokenObj.HIST_NW_M, debugFlag, cSpellApi);

            return(outTokenList);
        }
示例#2
0
        private static void TestGetCorrectTerm(CSpellApi cSpellApi)
        {
            // init
            // all lowerCase
            string          inText      = "Dur ing my absent.";
            bool            debugFlag   = false;
            List <TokenObj> inTokenList = TextObj.TextToTokenList(inText);
            // 1. convert to the non-empty token list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList);
            // result
            int      tarPos   = 0;
            MergeObj mergeObj = NonWordMergeCorrector.GetCorrectTerm(tarPos, nonSpaceTokenList, cSpellApi, debugFlag);

            // print out
            Console.WriteLine("--------- GetCorrectTerm( ) -----------");
            Console.WriteLine("In: [" + inText + "]");
            Console.WriteLine("In nonSpaceTokenList: [" + nonSpaceTokenList.Count + "]");
            Console.WriteLine("Out MergeObj: [" + mergeObj.ToString() + "]");
        }