public static bool IsRealWord(string inWord, CSpellApi cSpellApi, bool debugFlag)
        {
            // init
            RootDictionary checkDic   = cSpellApi.GetCheckDic();
            RootDictionary unitDic    = cSpellApi.GetUnitDic();
            WordWcMap      wordWcMap  = cSpellApi.GetWordWcMap();
            Word2Vec       word2VecOm = cSpellApi.GetWord2VecOm();
            int            inWordLen  = inWord.Length;
            // TBD, change method name
            int rwSplitWordMinLength = cSpellApi.GetDetectorRwSplitWordMinLength();
            int rwSplitWordMinWc     = cSpellApi.GetDetectorRwSplitWordMinWc();
            // realword must be:
            // 1. known in the dictionary
            // 2. not exception, such as url, email, digit, ...
            // => if excpetion, even is a non-word, no correction
            // 3. must have word2Vector value (inWord is auto converted to LC)
            // 4. frequency must be above a threshhold (inWord is auto to LC)
            // TBD, need to be configureable 200
            bool realWordFlag = (checkDic.IsValidWord(inWord)) && (!IsRealWordExceptions(inWord, unitDic) && (inWordLen >= rwSplitWordMinLength) && (word2VecOm.HasWordVec(inWord) == true) && (WordCountScore.GetWc(inWord, wordWcMap) >= rwSplitWordMinWc));

            if (debugFlag == true)
            {
                bool wordInDicFlag     = checkDic.IsValidWord(inWord);
                bool wordExceptionFlag = IsRealWordExceptions(inWord, unitDic);
                bool lengthFlag        = (inWordLen >= rwSplitWordMinLength);
                bool word2VecFlag      = word2VecOm.HasWordVec(inWord);
                bool wcFlag            = (WordCountScore.GetWc(inWord, wordWcMap) >= rwSplitWordMinWc);
                DebugPrint.PrintRwSplitDetect(inWord, realWordFlag, wordInDicFlag, wordExceptionFlag, lengthFlag, word2VecFlag, wcFlag, debugFlag);
            }
            return(realWordFlag);
        }
Ejemplo n.º 2
0
        public static bool IsNonWord(string inWord, CSpellApi cSpellApi, bool debugFlag)
        {
            // init
            RootDictionary checkDic = cSpellApi.GetCheckDic();
            RootDictionary unitDic  = cSpellApi.GetUnitDic();
            // non-word must be:
            // 1. not known in the dictionary
            // 2. not exception, such as url, email, digit, ...
            // => if excpetion, even is a nor-word, still not a misspelt
            bool nonWordFlag = (!checkDic.IsValidWord(inWord)) && (!IsNonWordExceptions(inWord, unitDic));

            if (debugFlag == true)
            {
                bool wordDicFlag       = checkDic.IsValidWord(inWord);
                bool wordExceptionFlag = IsNonWordExceptions(inWord, unitDic);
                DebugPrint.PrintNwDetect(inWord, nonWordFlag, wordDicFlag, wordExceptionFlag, debugFlag);
            }
            return(nonWordFlag);
        }
        // check dic and exception
        private static bool IsRealWordMerge(string inWord, CSpellApi cSpellApi, bool debugFlag)
        {
            // init
            RootDictionary checkDic = cSpellApi.GetSplitWordDic();             // merge Dic
            RootDictionary unitDic  = cSpellApi.GetUnitDic();
            // real word merge must:
            // 1. known in the dictionary
            // 2. not exception, such as url, email, digit, ...
            // => if excpetion, even is a non-word, still not a misspelt
            bool realWordMergeFlag = (checkDic.IsValidWord(inWord)) && (!IsRealWordExceptions(inWord, unitDic));

            if (debugFlag == true)
            {
                bool wordInDicFlag     = checkDic.IsValidWord(inWord);
                bool wordExceptionFlag = IsRealWordExceptions(inWord, unitDic);
                DebugPrint.PrintRwMergeDetect(inWord, realWordMergeFlag, wordInDicFlag, wordExceptionFlag, debugFlag);
            }
            return(realWordMergeFlag);
        }