// return candidate scoreObj list sorted by score, higher first
        public static List <ContextScore> GetCandidateScoreList(HashSet <string> candidates, int tarPos, int tarSize, List <TokenObj> nonSpaceTokenList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, bool debugFlag)
        {
            // find score object set for each candidates ...
            HashSet <ContextScore> candScoreSet = GetCandidateScoreSet(candidates, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, debugFlag);
            // sorted by the score, higher go first
            List <ContextScore> candScoreList         = new List <ContextScore>(candScoreSet);
            ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();

            candScoreList.Sort(csc);
            return(candScoreList);
        }
 // TBD, this file should be deleted by moving each method to
 // the assocaited ranking class
 // public method
 public static void PrintContextScore(HashSet <string> candSet, int tarPos, int tarSize, List <TokenObj> inTextList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, int maxCandNo, bool debugFlag)
 {
     if (debugFlag == true)
     {
         ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
         HashSet <ContextScore> cScoreSet          = RankByContext.GetCandidateScoreSet(candSet, tarPos, tarSize, inTextList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, debugFlag);
         var list = cScoreSet.OrderBy(x => x, csc).Take(maxCandNo).Select(obj => obj.ToString()).ToList();
         foreach (var item in list)
         {
             DebugPrint.PrintCScore(item, debugFlag);
         }
     }
 }
        // return candidate scoreObj list sorted by score, higher first
        public static List <ContextScore> GetCandidateScoreList(HashSet <MergeObj> candidates, List <TokenObj> nonSpaceTokenList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, bool debugFlag)
        {
            // find score object set for each candidates ...
            HashSet <ContextScore> candScoreSet = GetCandidateScoreSet(candidates, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, debugFlag);
            // sorted by the score, higher go first
            List <ContextScore> candScoreList         = new List <ContextScore>(candScoreSet);
            ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();

            candScoreList.Sort(csc);
            // print detail
            foreach (ContextScore contextScore in candScoreList)
            {
                DebugPrint.PrintCScore(contextScore.ToString(), debugFlag);
            }
            return(candScoreList);
        }
Beispiel #4
0
        // by combination, O, N, F, C
        private int compareByCombo(CSpellScore o1, CSpellScore o2)
        {
            int @out = 0;
            OrthographicScore oScore1 = ((CSpellScore)o1).GetOScore();
            OrthographicScore oScore2 = ((CSpellScore)o2).GetOScore();
            NoisyChannelScore nScore1 = ((CSpellScore)o1).GetNScore();
            NoisyChannelScore nScore2 = ((CSpellScore)o2).GetNScore();
            FrequencyScore    fScore1 = ((CSpellScore)o1).GetFScore();
            FrequencyScore    fScore2 = ((CSpellScore)o2).GetFScore();
            ContextScore      cScore1 = ((CSpellScore)o1).GetCScore();
            ContextScore      cScore2 = ((CSpellScore)o2).GetCScore();

            // 1. compared by orthographic score, best
            if (oScore1.GetScore() != oScore2.GetScore())
            {
                OrthographicScoreComparator <OrthographicScore> osc = new OrthographicScoreComparator <OrthographicScore>();
                @out = osc.Compare(oScore1, oScore2);
            }
            // 2. compared by noise channel score, 2nd best
            else if (nScore1.GetScore() != nScore2.GetScore())
            {
                NoisyChannelScoreComparator <NoisyChannelScore> nsc = new NoisyChannelScoreComparator <NoisyChannelScore>();
                @out = nsc.Compare(nScore1, nScore2);
            }
            // 3. compared by pure frequency score, 3rd best
            else if (fScore1.GetScore() != fScore2.GetScore())
            {
                FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>();
                @out = fsc.Compare(fScore1, fScore2);
            }
            // 4. compared by context score, 4 last
            else if (cScore1.GetScore() != cScore2.GetScore())
            {
                ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
                @out = csc.Compare(cScore1, cScore2);
            }
            // 5. alphabetic order
            else
            {
                string cand1 = ((CSpellScore)o1).GetCandStr();
                string cand2 = ((CSpellScore)o2).GetCandStr();
                @out = cand2.CompareTo(cand1);
            }
            return(@out);
        }
Beispiel #5
0
        private int compareByContext(CSpellScore o1, CSpellScore o2)
        {
            int          @out    = 0;
            ContextScore cScore1 = ((CSpellScore)o1).GetCScore();
            ContextScore cScore2 = ((CSpellScore)o2).GetCScore();

            // 1. compared by context score, 4 last
            if (cScore1.GetScore() != cScore2.GetScore())
            {
                ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
                @out = csc.Compare(cScore1, cScore2);
            }
            // 2. alphabetic order
            else
            {
                string cand1 = ((CSpellScore)o1).GetCandStr();
                string cand2 = ((CSpellScore)o2).GetCandStr();
                @out = cand2.CompareTo(cand1);
            }
            return(@out);
        }
Beispiel #6
0
        // return the best ranked str from candidates using word2Vec score
        // inTokenList, includes space token, is not coreTerm.Lc
        // return the orignal inStr if no candidate has score > 0.0d
        public static string GetTopRankStr(string inStr, HashSet <string> candidates, int tarPos, int tarSize, List <TokenObj> nonSpaceTokenList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, int shortSplitWordLength, int maxShortSplitWordNo, double rwSplitFactor, int maxCandNo, bool debugFlag)
        {
            // init
            string topRankStr = inStr;

            // Find the correction str
            if (candidates.Count > 0)
            {
                // 1. sorted score list for each candidates ...
                // This ranking can be improved if n-gram model (frequecny) is used
                List <ContextScore> candScoreList = RankByContext.GetCandidateScoreList(candidates, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, debugFlag);
                // 1.1 get the top tank candidate
                ContextScore topContextScore = candScoreList[0];
                // 2. validate the top rank
                // 2.1 wordVec for context
                DoubleVec contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, word2VecIm, contextRadius, word2VecSkipWord, debugFlag);
                // 2.2 wordVec for the original words before split
                ContextScore orgContextScore = new ContextScore(inStr, contextVec, word2VecOm);
                // 2.3 compare the top rank split to the original string b4 split
                if (IsTopCandValid(inStr, orgContextScore, topContextScore, rwSplitFactor, debugFlag) == true)
                {
                    // no correction: if score is not good enough for corection
                    topRankStr = topContextScore.GetTerm();
                }
                // debug print
                if (debugFlag == true)
                {
                    // print focus token (original)
                    DebugPrint.PrintCScore(orgContextScore.ToString(), debugFlag);
                    // print candidates
                    ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
                    var list = candScoreList.OrderBy(x => x, csc).Take(maxCandNo).Select(obj => obj.ToString()).ToList();
                    foreach (var item in list)
                    {
                        DebugPrint.PrintCScore(item, debugFlag);
                    }
                }
            }
            return(topRankStr);
        }
        // private methods
        // this test is not verified
        private static int RunTest(bool detailFlag, int tarPos, int tarSize, int contextRadius, long limitNo)
        {
            // init dic
            string    configFile = "../data/Config/cSpell.properties";
            CSpellApi cSpellApi  = new CSpellApi(configFile);

            cSpellApi.SetRankMode(CSpellApi.RANK_MODE_CONTEXT);
            Word2Vec word2VecIm       = cSpellApi.GetWord2VecIm();
            Word2Vec word2VecOm       = cSpellApi.GetWord2VecOm();
            bool     word2VecSkipWord = cSpellApi.GetWord2VecSkipWord();
            ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
            // provide cmdLine interface
            int returnValue = 0;

            try {
                StreamReader stdInput = new StreamReader(Console.OpenStandardInput());
                try {
                    string inText = null;
                    Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null))
                    {
                        // ---------------------------------
                        // Get spell correction on the input
                        // ---------------------------------
                        // convert input text to TokenObj
                        TextObj         textObj    = new TextObj(inText);
                        List <TokenObj> inTextList = textObj.GetTokenList();
                        // *2 because tokenList include space
                        string tarWord = inTextList[tarPos * 2].GetTokenStr();
                        for (int i = 1; i < tarSize; i++)
                        {
                            int ii = (tarPos + 1) * 2;
                            tarWord += " " + inTextList[ii].GetTokenStr();
                        }
                        Console.WriteLine("- input text: [" + inText + "]");
                        Console.WriteLine("- target: [" + tarPos + "|" + tarSize + "|" + tarWord + "]");
                        Console.WriteLine("- context radius: " + contextRadius);
                        // get all possible candidates
                        HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(tarWord, cSpellApi);
                        candSet.Add(tarWord);                         // add the original word
                        Console.WriteLine("-- canSet.size(): " + candSet.Count);
                        // get final suggestion
                        // remove space token from the list
                        List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
                        string          topRankStr        = GetTopRankStr(tarWord, candSet, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                        Console.WriteLine("- top rank str: " + topRankStr);
                        // print details
                        if (detailFlag == true)
                        {
                            HashSet <ContextScore> candScoreSet = GetCandidateScoreSet(candSet, tarPos, tarSize, inTextList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                            Console.WriteLine("------ Suggestion List ------");
                            var list = candScoreSet.OrderBy(x => x, csc).Take((int)limitNo).Select(obj => obj.ToString());
                            foreach (var item in list)
                            {
                                Console.WriteLine(item);
                            }
                        }
                        // print the prompt
                        Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    }
                } catch (Exception e2) {
                    Console.Error.WriteLine(e2.Message);
                    returnValue = -1;
                }
            } catch (Exception e) {
                Console.Error.WriteLine(e.Message);
                returnValue = -1;
            }
            return(returnValue);
        }