示例#1
0
        // tarPos: start from 0, not include empty space token
        public static string GetTopRankStr(string inStr, HashSet <string> candidates, CSpellApi cSpellApi, bool debugFlag, int tarPos, List <TokenObj> nonSpaceTokenList)
        {
            // init
            int       rankMode         = cSpellApi.GetRankMode();
            double    wf1              = cSpellApi.GetOrthoScoreEdDistFac();
            double    wf2              = cSpellApi.GetOrthoScorePhoneticFac();
            double    wf3              = cSpellApi.GetOrthoScoreOverlapFac();
            WordWcMap wordWcMap        = cSpellApi.GetWordWcMap();
            string    topRankStr       = inStr;
            int       maxCandNo        = cSpellApi.GetCanMaxCandNo();
            Word2Vec  word2VecIm       = cSpellApi.GetWord2VecIm();
            Word2Vec  word2VecOm       = cSpellApi.GetWord2VecOm();
            int       contextRadius    = cSpellApi.GetNw1To1ContextRadius();
            bool      word2VecSkipWord = cSpellApi.GetWord2VecSkipWord();
            double    rangeFactor      = cSpellApi.GetRankNwS1RankRangeFac();
            double    nwS1MinOScore    = cSpellApi.GetRankNwS1MinOScore();
            int       tarSize          = 1; // only for one-to-one or split, no merge here

            // get the top ranked candidate
            if (candidates.Count > 0)
            {
                // get the top rank str by scores
                switch (rankMode)
                {
                case CSpellApi.RANK_MODE_ORTHOGRAPHIC:
                    topRankStr = RankByOrthographic.GetTopRankStr(inStr, candidates, wf1, wf2, wf3);
                    ScoreDetailByMode.PrintOrthographicScore(inStr, candidates, maxCandNo, wf1, wf2, wf3, debugFlag);
                    break;

                case CSpellApi.RANK_MODE_FREQUENCY:
                    topRankStr = RankByFrequency.GetTopRankStr(candidates, wordWcMap);
                    ScoreDetailByMode.PrintFrequencyScore(candidates, wordWcMap, maxCandNo, debugFlag);
                    break;

                case CSpellApi.RANK_MODE_CONTEXT:
                    topRankStr = RankByContext.GetTopRankStr(inStr, candidates, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius);
                    ScoreDetailByMode.PrintContextScore(candidates, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, maxCandNo, debugFlag);
                    break;

                case CSpellApi.RANK_MODE_NOISY_CHANNEL:
                    topRankStr = RankByNoisyChannel.GetTopRankStr(inStr, candidates, wordWcMap, wf1, wf2, wf3);
                    ScoreDetailByMode.PrintNoisyChannelScore(inStr, candidates, wordWcMap, maxCandNo, wf1, wf2, wf3, debugFlag);
                    break;

                case CSpellApi.RANK_MODE_ENSEMBLE:
                    topRankStr = RankByEnsemble.GetTopRankStr(inStr, candidates, wordWcMap, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, rangeFactor, wf1, wf2, wf3);
                    // ensemble use same basic socre as CSpell
                    ScoreDetailByMode.PrintCSpellScore(inStr, candidates, wordWcMap, maxCandNo, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, wf1, wf2, wf3, debugFlag);
                    break;

                case CSpellApi.RANK_MODE_CSPELL:
                    topRankStr = RankByCSpellNonWord.GetTopRankStr(inStr, candidates, wordWcMap, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, rangeFactor, nwS1MinOScore, wf1, wf2, wf3);
                    ScoreDetailByMode.PrintCSpellScore(inStr, candidates, wordWcMap, maxCandNo, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, wf1, wf2, wf3, debugFlag);
                    break;
                }
            }
            return(topRankStr);
        }
 // this detail does not print how cSpell really fidn the top rank
 // it is sorted by CSpell score
 // CSpell use the cSpell score + context and frequency to find the top
 public static void PrintCSpellScore(string inStr, HashSet <string> candSet, WordWcMap wordWcMap, int maxCandNo, int tarPos, int tarSize, List <TokenObj> nonSpaceTokenList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, double wf1, double wf2, double wf3, bool debugFlag)
 {
     if (debugFlag == true)
     {
         // NW 1To1
         CSpellScoreNw1To1Comparator <CSpellScore> csc = new CSpellScoreNw1To1Comparator <CSpellScore>();
         HashSet <CSpellScore> cScoreSet = RankByCSpellNonWord.GetCandidateScoreSet(inStr, candSet, wordWcMap, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, wf1, wf2, wf3, debugFlag);
         var list = cScoreSet.OrderBy(x => x, csc).Take(maxCandNo).Select(obj => obj.ToString()).ToList();
         foreach (var item in list)
         {
             DebugPrint.PrintScore(item, debugFlag);
         }
     }
 }
        public static string GetTopRankStr(string inStr, HashSet <string> candidates, WordWcMap wordWcMap, int tarPos, int tarSize, List <TokenObj> nonSpaceTokenList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, double rangeFactor, double wf1, double wf2, double wf3, bool debugFlag)
        {
            string topRankStr = inStr;
            // get the sorted list
            int compareMode = CSpellScoreNw1To1Comparator <int> .COMPARE_BY_ENSEMBLE;
            List <CSpellScore> candScoreList = RankByCSpellNonWord.GetCandidateScoreList(inStr, candidates, wordWcMap, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, compareMode, wf1, wf2, wf3, debugFlag);

            // Set a range for the candidates to find all possible top rank
            // use the highest context and frequecny score to final rank.
            if (candScoreList.Count >= 0)
            {
                topRankStr = candScoreList[0].GetCandStr();
            }
            return(topRankStr);
        }