// tarPos: start from 0, not include empty space token public static string GetTopRankStr(string inStr, HashSet <string> candidates, CSpellApi cSpellApi, bool debugFlag, int tarPos, List <TokenObj> nonSpaceTokenList) { // init int rankMode = cSpellApi.GetRankMode(); double wf1 = cSpellApi.GetOrthoScoreEdDistFac(); double wf2 = cSpellApi.GetOrthoScorePhoneticFac(); double wf3 = cSpellApi.GetOrthoScoreOverlapFac(); WordWcMap wordWcMap = cSpellApi.GetWordWcMap(); string topRankStr = inStr; int maxCandNo = cSpellApi.GetCanMaxCandNo(); Word2Vec word2VecIm = cSpellApi.GetWord2VecIm(); Word2Vec word2VecOm = cSpellApi.GetWord2VecOm(); int contextRadius = cSpellApi.GetNw1To1ContextRadius(); bool word2VecSkipWord = cSpellApi.GetWord2VecSkipWord(); double rangeFactor = cSpellApi.GetRankNwS1RankRangeFac(); double nwS1MinOScore = cSpellApi.GetRankNwS1MinOScore(); int tarSize = 1; // only for one-to-one or split, no merge here // get the top ranked candidate if (candidates.Count > 0) { // get the top rank str by scores switch (rankMode) { case CSpellApi.RANK_MODE_ORTHOGRAPHIC: topRankStr = RankByOrthographic.GetTopRankStr(inStr, candidates, wf1, wf2, wf3); ScoreDetailByMode.PrintOrthographicScore(inStr, candidates, maxCandNo, wf1, wf2, wf3, debugFlag); break; case CSpellApi.RANK_MODE_FREQUENCY: topRankStr = RankByFrequency.GetTopRankStr(candidates, wordWcMap); ScoreDetailByMode.PrintFrequencyScore(candidates, wordWcMap, maxCandNo, debugFlag); break; case CSpellApi.RANK_MODE_CONTEXT: topRankStr = RankByContext.GetTopRankStr(inStr, candidates, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius); ScoreDetailByMode.PrintContextScore(candidates, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, maxCandNo, debugFlag); break; case CSpellApi.RANK_MODE_NOISY_CHANNEL: topRankStr = RankByNoisyChannel.GetTopRankStr(inStr, candidates, wordWcMap, wf1, wf2, wf3); ScoreDetailByMode.PrintNoisyChannelScore(inStr, candidates, wordWcMap, maxCandNo, wf1, wf2, wf3, debugFlag); break; case CSpellApi.RANK_MODE_ENSEMBLE: topRankStr = RankByEnsemble.GetTopRankStr(inStr, candidates, wordWcMap, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, rangeFactor, wf1, wf2, wf3); // ensemble use same basic socre as CSpell ScoreDetailByMode.PrintCSpellScore(inStr, candidates, wordWcMap, maxCandNo, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, wf1, wf2, wf3, debugFlag); break; case CSpellApi.RANK_MODE_CSPELL: topRankStr = RankByCSpellNonWord.GetTopRankStr(inStr, candidates, wordWcMap, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, rangeFactor, nwS1MinOScore, wf1, wf2, wf3); ScoreDetailByMode.PrintCSpellScore(inStr, candidates, wordWcMap, maxCandNo, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, wf1, wf2, wf3, debugFlag); break; } } return(topRankStr); }
public static void PrintNoisyChannelScore(string inStr, HashSet <string> candSet, WordWcMap wordWcMap, int maxCandNo, double wf1, double wf2, double wf3, bool debugFlag) { if (debugFlag == true) { NoisyChannelScoreComparator <NoisyChannelScore> ncsc = new NoisyChannelScoreComparator <NoisyChannelScore>(); HashSet <NoisyChannelScore> ncScoreSet = RankByNoisyChannel.GetCandidateScoreSet(inStr, candSet, wordWcMap, wf1, wf2, wf3); var list = ncScoreSet.OrderBy(x => x, ncsc).Take(maxCandNo).Select(obj => obj.ToString()).ToList(); foreach (var item in list) { DebugPrint.PrintNScore(item, debugFlag); } } }