// core process for detect rewal-word 1-to-1 public static bool IsRealWord(string inWord, CSpellApi cSpellApi, bool debugFlag) { // init RootDictionary checkDic = cSpellApi.GetCheckDic(); RootDictionary pnDic = cSpellApi.GetPnDic(); RootDictionary aaDic = cSpellApi.GetAaDic(); RootDictionary unitDic = cSpellApi.GetUnitDic(); int inWordLen = inWord.Length; string inWordLc = inWord.ToLower(); // no need, TBD WordWcMap wordWcMap = cSpellApi.GetWordWcMap(); Word2Vec word2VecOm = cSpellApi.GetWord2VecOm(); int rw1To1WordMinLength = cSpellApi.GetDetectorRw1To1WordMinLength(); int rw1To1WordMinWc = cSpellApi.GetDetectorRw1To1WordMinWc(); // realword 1-to-1 must be: // 1. known in the dictionary // 2. not exception, such as url, email, digit, ... // => if excpetion, even is a non-word, no correction // 3. must have word2Vector value (inWord is auto converted to LC) // 4. frequency must be above a threshhold (inWord is auto to LC) // TBD, need to be configureable, 3 and 65 bool realWordFlag = (checkDic.IsValidWord(inWord)) && (!IsRealWordExceptions(inWord, pnDic, aaDic, unitDic)) && (inWordLen >= rw1To1WordMinLength) && (word2VecOm.HasWordVec(inWord) == true) && (WordCountScore.GetWc(inWord, wordWcMap) >= rw1To1WordMinWc); if (debugFlag == true) { bool wordInDicFlag = checkDic.IsValidWord(inWord); bool wordExceptionFlag = IsRealWordExceptions(inWord, pnDic, aaDic, unitDic); bool legnthFlag = (inWordLen >= rw1To1WordMinLength); bool word2VecFlag = word2VecOm.HasWordVec(inWord); bool wcFlag = (WordCountScore.GetWc(inWord, wordWcMap) >= rw1To1WordMinWc); DebugPrint.PrintRw1To1Detect(inWord, realWordFlag, wordInDicFlag, wordExceptionFlag, legnthFlag, word2VecFlag, wcFlag, debugFlag); } return(realWordFlag); }