Exemplo n.º 1
0
        private static void TestGetCorrectTerm(CSpellApi cSpellApi)
        {
            // init
            // all lowerCase
            string inText = "hotflashes";
            // test process:
            TokenObj inToken  = new TokenObj(inText);
            TokenObj outToken = NonWordCorrector.GetCorrectTerm(inToken, cSpellApi);
            // result
            string outText = outToken.GetTokenStr();

            // print out
            Console.WriteLine("--------- GetCorrectTerm( ) -----------");
            Console.WriteLine("In: [" + inText + "]");
            Console.WriteLine("Out: [" + outText + "]");
        }
Exemplo n.º 2
0
        // public method
        // Use: for loop, the latest and greatest implementation
        // original implementation with for loop, To be deleted
        // the core of spell-correction, include split
        // inTokenList is the whole text
        public static List <TokenObj> Process(List <TokenObj> inTokenList, CSpellApi cSpellApi, bool debugFlag)
        {
            DebugPrint.PrintProcess("3-4. NonWord-Split & 1To1", debugFlag);
            DebugPrint.PrintInText(TextObj.TokenListToText(inTokenList), debugFlag);
            // init the output TokenList
            List <TokenObj> outTokenList = new List <TokenObj>();
            // process: go through each token for detection and correction
            // for the 1-to-1 and split correction
            int tarPos = 0;             // the position of the tokenObj in the inTokenList
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList);
            // use the inTokenList to keep the same spcae token
            TokenObj outTokenObj         = null;
            int      maxLegitTokenLength = cSpellApi.GetMaxLegitTokenLength();

            foreach (TokenObj tokenObj in inTokenList)
            {
                /// <summary>
                /// no context
                /// TokenObj outTokenObj = SpellCorrector.GetCorrectTerm(tokenObj,
                ///    cSpellApi, debugFlag);
                ///
                /// </summary>
                // skip empty space tokens and long tokens
                // SCR-3, use legit token
                if (tokenObj.IsLegitToken(maxLegitTokenLength) == true)
                {
                    // correct term is the highest ranked candidate
                    outTokenObj = NonWordCorrector.GetCorrectTerm(tokenObj, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);
                    // used tarPos for context module
                    tarPos++;
                }
                else
                {
                    outTokenObj = tokenObj;
                }
                // add the corrected tokenObj to the output token list
                // use FlatMap because there might be a split
                Split1To1Corrector.AddSplit1To1Correction(outTokenList, outTokenObj);
            }
            return(outTokenList);
        }