public static SqlString LongPhonetic(SqlInt16 PhoneticType, SqlString InputString) { BasePhonetics PhoneticObject; string _inputString; if (InputString.IsNull || InputString.Value.Trim() == String.Empty) { _inputString = " "; } else { _inputString = InputString.Value; } switch (PhoneticType.Value) { case 0: PhoneticObject = new Soundex(_inputString); break; case 1: PhoneticObject = new RefinedSoundex(_inputString); break; case 2: PhoneticObject = new NYSIIS(_inputString); break; case 3: PhoneticObject = new DaitchMokotoff(_inputString); break; case 4: PhoneticObject = new Metaphone(_inputString); break; case 6: PhoneticObject = new ColognePhonetic(_inputString); break; default: PhoneticObject = new Soundex(_inputString); PhoneticType = 0; break; } PhoneticObject.Iterate(); return(PhoneticObject.ReadOutput()); }
// real-word candidate has more restriction than non-word // TBD, need to organize the code ... // the check should be done in the ranking // Core process for real-word candidates private static bool IsValid1To1Cand(string inWord, string cand, CSpellApi cSpellApi) { RootDictionary suggestDic = cSpellApi.GetSuggestDic(); Word2Vec word2VecOm = cSpellApi.GetWord2VecOm(); WordWcMap wordWcMap = cSpellApi.GetWordWcMap(); // real-word, check phonetic and suggDic // 1. check suggDic // 1.1 edDist <= 1 // 1.2 edDist <= 2 && phonetic dist <= 1 // 2. check if inflections, not a candidate real-word, not correct bool flag = false; int rw1To1CandMinWc = cSpellApi.GetCanRw1To1CandMinWc(); int rw1To1CandMinLength = cSpellApi.GetCanRw1To1CandMinLength(); string inWordLc = inWord.ToLower(); int inWordLen = inWordLc.Length; int candLen = cand.Length; int lenDiff = inWordLen - candLen; // 1. check suggDic and inflVars if ((suggestDic.IsDicWord(cand) == true) && (word2VecOm.HasWordVec(cand) == true) && (candLen >= rw1To1CandMinLength) && (WordCountScore.GetWc(cand, wordWcMap) >= rw1To1CandMinWc) && (InflVarsUtil.IsInflectionVar(inWordLc, cand) == false)) // not inflVars { //&& ((lenDiff <= 1) && (lenDiff >= -1))) // length diff <= 1 // more restriction for real-word candidates int pmDist = Metaphone2.GetDistance(inWordLc, cand); int prDist = RefinedSoundex.GetDistance(inWordLc, cand); int leadDist = GetLeadCharDist(inWordLc, cand); int endDist = GetEndCharDist(inWordLc, cand); int lengthDist = GetLengthDist(inWordLc, cand); int totalDist1 = leadDist + endDist + lengthDist + pmDist + prDist; int editDist = EditDistance.GetDistanceForRealWord(inWordLc, cand); int totalDist2 = editDist + pmDist + prDist; // if they sound the same if ((pmDist == 0) && (prDist == 0)) { flag = true; } // if they sound similar and orthographic is also similar // fixed from empierical test, not configuable else if ((totalDist1 < 3) && (totalDist2 < 4) && (pmDist * prDist == 0)) { flag = true; } } return(flag); }
private static void TestFpStr(string str1, string str2, CSpellApi cSpellApi) { HashSet <string> candSet = GetCandidates(str1.ToLower(), cSpellApi); bool flag = candSet.Contains(str2); if (flag == true) { totalFpNo_++; } totalFpStrNo_++; Console.WriteLine(flag + "|" + totalFpNo_ + "|" + totalFpStrNo_ + "|" + str1 + "|" + str2 + "|" + EditDistance.GetDistanceForRealWord(str1, str2) + "|" + RefinedSoundex.GetDistanceDetailStr(str1, str2) + "|" + Metaphone2.GetDistanceDetailStr(str1, str2, 10)); }