TermUtil C# (CSharp) Code-Beispiele

Beispiel #1

0

Datei anzeigen

Datei: WordCountScore.cs Projekt: alishchytovych/CSpell.Net

        // include multiwords, multiwords = avg. score
        private static double GetScoreDev2(string inWord, WordWcMap wordWcMap)
        {
            // check multiword case for split
            bool          normFlag   = false;  // don't use punctuation for determiner
            List <string> wordList   = TermUtil.ToWordList(inWord, normFlag);
            double        score      = 0.0;
            double        totalScore = 0.0;
            int           totalWords = wordList.Count;

            //double maxWc = GetAdjustedWc(wordWcMap.GetMaxWc());
            // use the average score for the multiwords
            foreach (string word in wordList)
            {
                //double curScore = GetScoreByChurch(word, wordWcMap);
                //double curScore = GetScoreByCrowell(word, wordWcMap);
                //double curScore = GetScoreByPeter(word, wordWcMap);
                //double curScore = GetUnigramFreqScore(word, wordWcMap);
                //double curScore = GetWc(word, wordWcMap);
                double curScore = GetScoreDev1(word, wordWcMap);
                totalScore += curScore;
            }
            if (totalScore > 0.0)
            {
                score = totalScore / totalWords;
            }
            return(score);
        }

Beispiel #2

0

Datei anzeigen

Datei: NonWordSplitCandidates.cs Projekt: alishchytovych/CSpell.Net

        // check all split words form a term to verify it is a valid
        // inTerm is the term to be split
        // the inTerm is a coreTerm
        public static bool IsValidSplitWords(string inTerm, CSpellApi cSpellApi)
        {
            //RootDictionary unitDic = cSpellApi.GetUnitDic();
            List <string> splitWordList = TermUtil.ToWordList(inTerm);
            bool          validFlag     = true;

            // go through all split words, they can be:
            // 1. digit (pure number)
            // 2. unit
            // 3. word in the split word dictionary: English + ProperNoun (not Aa)
            // if any splitWord is not above, the split is false
            foreach (string splitWord in splitWordList)
            {
                /* remove unit and digit beacuse:
                 * 1. they are handled in ND
                 * 2. some unit are Aa, such as ng, cause noise [FP]
                 * - seing => se i ng, no good
                 * if((DigitPuncTokenUtil.IsDigit(splitWord) == false) // digit
                 * && (unitDic.IsDicWord(splitWord) == false) // unit
                 * && (IsValidSplitWord(splitWord, cSpellApi) == false))// split word
                 */
                if (IsValidSplitWord(splitWord, cSpellApi) == false)
                {
                    validFlag = false;
                    break;
                }
            }
            return(validFlag);
        }

Beispiel #3

0

Datei anzeigen

Datei: RealWordSplitCandidates.cs Projekt: alishchytovych/CSpell.Net

        // These are hueristic rule for real-wrod split
        // check the total no of short word for split words in inTerm (candidate)
        // short word is configurable, such as 2 or 3
        // the total no of split shot word must less than a number, default is 2
        // This rule is added to filter out: some -> so me,
        // filter out: another -> a not her (shortSplitWordNo = 3)
        // filter out: anyone -> any one (shortSplitWordNo = 2)
        // 1. keep: away -> a way (shortSplitWordNo = 1)
        // 2. filter: out soon -> so on (shortSplitWordNo = 2)
        // 3. filter: out anyway -> any way (shortSplitWordNo = 2)
        private static bool CheckShortSplitWords(string inTerm, CSpellApi cSpellApi)
        {
            // init
            int shortSplitWordLength = cSpellApi.GetCanRwShortSplitWordLength();
            int maxShortSplitWordNo  = cSpellApi.GetCanRwMaxShortSplitWordNo();
            // convert to word list
            List <string> wordList         = TermUtil.ToWordList(inTerm);
            bool          flag             = true;
            int           shortSplitWordNo = 0;   // total no of short split word 1

            foreach (string word in wordList)
            {
                // find shor word
                if (word.Length <= shortSplitWordLength)
                {
                    shortSplitWordNo++;
                }
            }
            // check the total no of short split words (length <= 2)
            if (shortSplitWordNo >= maxShortSplitWordNo)
            {
                flag = false;
            }
            return(flag);
        }

Beispiel #4

0

Datei anzeigen

Datei: Split1To1Corrector.cs Projekt: alishchytovych/CSpell.Net

        // 3 operations:
        // convert a tokenObj to a arrayList of tokenObjs:
        // 1. merge (delete) a tokenObj if the str is empty (length = 0)
        // 2. keep the same tokenObj if str is a single word
        // 3. split a tokenObj if the str contains space
        public static void AddSplit1To1Correction(List <TokenObj> inList, TokenObj inToken)
        {
            string tokenStr = inToken.GetTokenStr();

            // 1. do not add to the list if the token is empty
            if ((string.ReferenceEquals(tokenStr, null)) || (tokenStr.Length == 0))
            {
                // do nothing
            }
            // 2. keep the same tokenObj if there is no change
            // 1-to-1 correction
            else if (TermUtil.IsMultiword(tokenStr) == false)
            {
                Add1To1Correction(inList, inToken);
                // TB Deleted
                //inList.add(inToken);
            }
            // 3. split a tokenObj to an arrayList if the str has space
            else
            {
                AddSplitCorrection(inList, inToken);

                /* TB deleted
                 * ArrayList<TokenObj> tempTokenList = new ArrayList<TokenObj>();
                 * // keep token and delimiters
                 * String[] tokenArray = tokenStr.split(TextObj.patternStrSpace_);
                 * tempTokenList = new ArrayList<TokenObj>(Arrays.stream(tokenArray)
                 *  .map(token -> new TokenObj(inToken, token))
                 *  .collect(Collectors.toList()));
                 * inList.addAll(tempTokenList);
                 */
            }
        }

Beispiel #5

0

Datei anzeigen

        /// <summary>
        /// Compare two object o1 and o2.  Both objects o1 and o2 are
        /// NoisyChannelScore.  The compare algorithm:
        /// </summary>
        /// <param name="o1">  first object to be compared </param>
        /// <param name="o2">  second object to be compared
        /// </param>
        /// <returns>  a negative integer, 0, or positive integer to represent the
        ///          object o1 is less, equals, or greater than object 02. </returns>
        public virtual int Compare(NoisyChannelScore o1, NoisyChannelScore o2)
        {
            // 1. compare how many words for the candidates
            // for now, we assume less word is better,
            // i.e. whatever is better than "what ever"
            int    @out    = 0;
            string cand1   = ((NoisyChannelScore)o1).GetCandStr();
            string cand2   = ((NoisyChannelScore)o2).GetCandStr();
            int    wordNo1 = TermUtil.GetWordNo(cand1);
            int    wordNo2 = TermUtil.GetWordNo(cand2);

            if (wordNo1 != wordNo2)
            {
                @out = wordNo1 - wordNo2;                 // less wordNo has higher rank
            }
            else
            {
                // 2. compare noisy Channel score
                double score1 = ((NoisyChannelScore)o1).GetScore();
                double score2 = ((NoisyChannelScore)o2).GetScore();
                // SCR-2: use a fixed number to ensure result is not 0.
                if (score2 > score1)
                {
                    // from high to low
                    @out = 1;
                }
                else if (score2 < score1)
                {
                    @out = -1;
                }
                else
                {
                    // 3. compare by orthographic score
                    OrthographicScore oScore1 = ((NoisyChannelScore)o1).GetOScore();
                    OrthographicScore oScore2 = ((NoisyChannelScore)o2).GetOScore();
                    if (oScore1.GetScore() != oScore2.GetScore())
                    {
                        OrthographicScoreComparator <OrthographicScore> osc = new OrthographicScoreComparator <OrthographicScore>();
                        @out = osc.Compare(oScore1, oScore2);
                    }
                    else                         // 4. hannelScore
                    {
                        FrequencyScore fScore1 = ((NoisyChannelScore)o1).GetFScore();
                        FrequencyScore fScore2 = ((NoisyChannelScore)o2).GetFScore();
                        if (fScore1.GetScore() != fScore2.GetScore())
                        {
                            FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>();
                            @out = fsc.Compare(fScore1, fScore2);
                        }
                        else                             // 4. alphabetic order
                        {
                            @out = cand2.CompareTo(cand1);
                        }
                    }
                }
            }
            return(@out);
        }

Beispiel #6

0

Datei anzeigen

Datei: Word2VecScore.cs Projekt: alishchytovych/CSpell.Net

        // Use Avg. word2Vec Om for each word in the inTerm
        private static DoubleVec GetWordVecForTerm(string inTerm, Word2Vec w2vOm)
        {
            List <string> inWordList = TermUtil.ToWordList(inTerm);
            // avg. the wordVec if inTerm is a multiword
            DoubleVec outWordVec = GetAvgWordVecForList(inWordList, w2vOm);

            // TBD: take care of possesive
            return(outWordVec);
        }

Beispiel #7

0

Datei anzeigen

Datei: ContextScore.cs Projekt: alishchytovych/CSpell.Net

        // private method
        // Test merge and Split
        private static void Test(string inText, int tarPos, int tarSize, int radius, string mergedWord, string splitWords, Word2Vec w2vIm, Word2Vec w2vOm)
        {
            // 0. process the inText
            TextObj         textObj    = new TextObj(inText);
            List <TokenObj> inTextList = textObj.GetTokenList();
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);

            Console.WriteLine("==========================================");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            bool word2VecSkipWord = true;
            bool debugFlag        = false;
            // 1.a context with window radius
            DoubleVec contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
            // 1.b context with all inText
            DoubleVec contextVecA = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, word2VecSkipWord, debugFlag);
            // 1.c get score1
            ContextScore score1  = new ContextScore(mergedWord, contextVec, w2vOm);
            ContextScore score1a = new ContextScore(mergedWord, contextVecA, w2vOm);

            Console.WriteLine(score1.ToString() + "|" + string.Format("{0,1:F8}", score1a.GetScore()));
            // 2. split words
            ContextScore score2  = new ContextScore(splitWords, contextVec, w2vOm);
            ContextScore score2a = new ContextScore(splitWords, contextVecA, w2vOm);

            Console.WriteLine(score2.ToString() + "|" + string.Format("{0,1:F8}", score2a.GetScore()));
            // 3. 3. 3. Use avg. score on single words
            // This method use different context for each single word
            List <string> splitWordList = TermUtil.ToWordList(splitWords);
            int           index         = 0;
            double        scoreSAvg     = 0.0d;  // radius
            double        scoreSAAvg    = 0.0d;  // all inText

            //debugFlag = false;
            foreach (string splitWord in splitWordList)
            {
                // window radius
                DoubleVec    contextVecS = Word2VecContext.GetContextVec(tarPos + index, 1, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
                ContextScore scoreS      = new ContextScore(splitWord, contextVecS, w2vOm);
                //System.out.println("-- " + scoreS.ToString());
                scoreSAvg += scoreS.GetScore();
                // all text
                DoubleVec    contextVecSA = Word2VecContext.GetContextVec(tarPos + index, 1, nonSpaceTokenList, w2vIm, word2VecSkipWord, debugFlag);
                ContextScore scoreSA      = new ContextScore(splitWord, contextVecSA, w2vOm);
                //System.out.println("-- " + scoreSA.ToString());
                scoreSAAvg += scoreSA.GetScore();
                index++;
            }
            scoreSAvg  = scoreSAvg / index;            // window
            scoreSAAvg = scoreSAAvg / index;           // all text
            Console.WriteLine("Avg. Single Word|" + string.Format("{0,1:F8}", scoreSAvg) + "|" + string.Format("{0,1:F8}", scoreSAAvg));
        }

Beispiel #8

0

Datei anzeigen

        // get possible split set by replacing hyphen with space
        protected internal static string GetSplitByPunc(string inWord, char puncChar)
        {
            char[] temp = inWord.ToCharArray();
            for (int i = 0; i < temp.Length; i++)
            {
                if (temp[i] == puncChar)
                {
                    temp[i] = ' ';
                }
            }
            string splitStr = TermUtil.Trim(new string(temp));

            return(splitStr);
        }

Beispiel #9

0

Datei anzeigen

Datei: RankRealWord1To1ByCSpell.cs Projekt: alishchytovych/CSpell.Net

        // These are hueristic rule for real-word one-to-one correction
        // check if all one-to-one words in inTerm (candidate)
        // 1. must have wordVec.
        private static bool Check1To1Words(string inTerm, Word2Vec word2VecOm)
        {
            List <string> wordList = TermUtil.ToWordList(inTerm);
            bool          flag     = true;

            foreach (string word in wordList)
            {
                if (word2VecOm.HasWordVec(word) == false)
                {
                    flag = false;
                    break;
                }
            }
            return(flag);
        }

Beispiel #10

0

Datei anzeigen

        // get all possible split combination by 1 space
        // lowercase only
        // not include duplicates
        // This is the core split process by space
        protected internal static HashSet <string> GetSplitSetBy1Space(string inWord)
        {
            HashSet <string> splitSet = new HashSet <string>();
            string           word     = inWord.ToLower();

            // Insert space inside the word, not on either ends
            for (int i = 1; i < word.Length; i++)
            {
                // Insert space for split
                string insertWord = word.Substring(0, i) + GlobalVars.SPACE_STR + word.Substring(i);
                // remove multiple spaces
                // needed when inserting a space to a space
                // Use this to convert "a  b" to "a b"
                splitSet.Add(TermUtil.StringTrim(insertWord));
            }
            return(splitSet);
        }

Beispiel #11

0

Datei anzeigen

        private static void AddMergeObj(string tarWord, string orgMergeWord, string mergeWord, int mergeNo, int startIndex, int tarIndex, int endIndex, int startPos, int tarPos, int endPos, HashSet <MergeObj> mergeSet, RootDictionary suggestDic, RootDictionary aADic)
        {
            // 1. convert merged word to coreTerm
            int  ctType = CoreTermUtil.CT_TYPE_SPACE_PUNC;
            bool lcFlag = true;
            // only take care of the end punctuation for the coreTerm
            string coreStr = TermUtil.StripEndPuncSpace(mergeWord);

            // 2. check if the coreStr of mergeWord is in suggest Dic
            // the merge word is not a Aa, assuming no merge for Aa
            // becase Aa is short enough
            if ((suggestDic.IsDicWord(coreStr) == true) && (aADic.IsDicWord(coreStr) == false))
            {
                MergeObj mergeObj = new MergeObj(tarWord, orgMergeWord, mergeWord, coreStr, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos);
                mergeSet.Add(mergeObj);
            }
        }

Beispiel #12

0

Datei anzeigen

Datei: Word2VecScore.cs Projekt: alishchytovych/CSpell.Net

        // this method is to be deleted because it has same result as GetScore()
        public static double GetScore2(string inTerm, DoubleVec contextVec, Word2Vec w2vOm)
        {
            List <string> inWordList = TermUtil.ToWordList(inTerm);
            double        score      = 0.0d;
            int           count      = 0;

            foreach (string word in inWordList)
            {
                DoubleVec wordVec = w2vOm.GetWordVec(word);
                if (wordVec != null)
                {
                    score += GetCwobScore(wordVec, contextVec);
                }
                count++;
            }
            // add score first, then calculate the avg.
            score = score / count;
            return(score);
        }

Beispiel #13

0

Datei anzeigen

Datei: WordCountScore.cs Projekt: alishchytovych/CSpell.Net

        // get socre for single word and multiwords (for split cases)
        // 1). multiword: score = avg. score of allwords
        // 2). single word: score =  log(adjust WC) / log (adjust Max. WC).
        public static double GetAdjustScoreAvg(string inWord, WordWcMap wordWcMap)
        {
            // check multiword case for split
            bool          normFlag   = false;  // don't use punctuation for determiner
            List <string> wordList   = TermUtil.ToWordList(inWord, normFlag);
            double        score      = 0.0;
            double        totalScore = 0.0;
            long          totalWords = wordList.Count;
            double        maxWc      = GetAdjustedWc(wordWcMap.GetMaxWc());

            // use the average score for the multiwords
            foreach (string word in wordList)
            {
                totalScore += GetWordScore(word, maxWc, wordWcMap);
            }
            if (totalWords > 0)
            {
                score = totalScore / totalWords;
            }
            return(score);
        }

Beispiel #14

0

Datei anzeigen

Datei: RealWordSplitCandidates.cs Projekt: alishchytovych/CSpell.Net

        // check all split words
        private static bool CheckSplitWords(string inTerm, CSpellApi cSpellApi)
        {
            // convert to word list
            List <string> splitWordList = TermUtil.ToWordList(inTerm);
            // go through all split words, they can be:
            // 1. digit (pure number)
            // 2. unit
            // 3. word in the split word dictionary: English + ProperNoun (not Aa)
            // if any splitWord is not above, the split is false
            bool flag = true;

            foreach (string splitWord in splitWordList)
            {
                // check each split word
                if (IsValidSplitWord(splitWord, cSpellApi) == false)
                {
                    flag = false;
                    break;
                }
            }
            return(flag);
        }

Beispiel #15

0

Datei anzeigen

        // public method
        /// <summary>
        /// The core method to correct a word by following steps:
        /// <ul>
        /// <li>Convert inToken to removeEndPuncStr
        /// <li>detect if misspell (OOV) - non-word, exclude Aa
        /// <li>get candidates
        ///     <ul>
        ///     <li>get candidates from merge.
        ///     </ul>
        /// <li>Rank candidates
        ///     <ul>
        ///     <li>orthographic
        ///     <li>frequency
        ///     <li>context
        ///     </ul>
        /// <li>Update information
        ///
        /// </ul>
        /// </summary>
        /// <param name="tarPos">    postion of target token </param>
        /// <param name="nonSpaceTokenList"> token list without space token(s) </param>
        /// <param name="cSpellApi"> CSpell Api object </param>
        /// <param name="debugFlag"> flag for debug print
        /// </param>
        /// <returns>    the corrected merged word in MergeObj if the token is OOV
        ///             and suggested merged word found.
        ///             Otherwise, a null of MergeObj is returned. </returns>
        // return the original term if no good correctin are found
        public static MergeObj GetCorrectTerm(int tarPos, List <TokenObj> nonSpaceTokenList, CSpellApi cSpellApi, bool debugFlag)
        {
            // get tarWord from tarTokenObj and init outTokenObj
            TokenObj tarTokenObj = nonSpaceTokenList[tarPos];
            string   tarWord     = tarTokenObj.GetTokenStr();
            MergeObj outMergeObj = null;             // no merge if it is null
            // 1. only remove ending punctuation for coreTerm
            string coreStr = TermUtil.StripEndPuncSpace(tarWord).ToLower();

            // 2. non-word correction
            // check if tarWord and removeEndPuncStr is OOV
            if (NonWordMergeDetector.IsDetect(tarWord, coreStr, cSpellApi, debugFlag) == true)
            {
                cSpellApi.UpdateDetectNo();
                // 3. get candidates from merge
                HashSet <MergeObj> mergeSet = NonWordMergeCandidates.GetCandidates(tarPos, nonSpaceTokenList, cSpellApi);
                // 4. Ranking: get top ranked candidates as corrected terms
                // 4.1 just use frenquency or context, no orthoGraphic
                // in case of using context
                outMergeObj = RankNonWordMergeByMode.GetTopRankMergeObj(mergeSet, cSpellApi, tarPos, nonSpaceTokenList, debugFlag);
            }
            return(outMergeObj);
        }

Beispiel #16

0

Datei anzeigen

Datei: WordCountScore.cs Projekt: alishchytovych/CSpell.Net

        public static double GetAdjustScoreMin(string inWord, WordWcMap wordWcMap)
        {
            // check multiword case for split
            bool          normFlag   = false;  // don't use punctuation for determiner
            List <string> wordList   = TermUtil.ToWordList(inWord, normFlag);
            double        score      = 0.0;
            double        totalScore = 0.0;
            int           totalWords = wordList.Count;
            double        maxWc      = GetAdjustedWc(wordWcMap.GetMaxWc());
            // use the average score for the multiwords
            double minScore = int.MaxValue;

            foreach (string word in wordList)
            {
                double curScore = GetWordScore(word, maxWc, wordWcMap);
                minScore = (curScore < minScore ? curScore : minScore);
            }
            if (minScore < int.MaxValue)
            {
                score = minScore;
            }
            return(score);
        }

Beispiel #17

0

Datei anzeigen

        /// <summary>
        /// Compare two object o1 and o2.  Both objects o1 and o2 are
        /// FrequencyScore.  The compare algorithm:
        /// </summary>
        /// <param name="o1">  first object to be compared </param>
        /// <param name="o2">  second object to be compared
        /// </param>
        /// <returns>  a negative integer, 0, or positive integer to represent the
        ///          object o1 is less, equals, or greater than object 02. </returns>
        public virtual int Compare(FrequencyScore o1, FrequencyScore o2)
        {
            // 1. compare how many words
            // for now, we assume less word is better,
            // i.e. whatever is better than "what ever"
            int    @out    = 0;
            string word1   = ((FrequencyScore)o1).GetWord();
            string word2   = ((FrequencyScore)o2).GetWord();
            int    wordNo1 = TermUtil.GetWordNo(word1);
            int    wordNo2 = TermUtil.GetWordNo(word2);

            if (wordNo1 != wordNo2)
            {
                @out = wordNo1 - wordNo2; // less wordNo has higher rank
            }
            else                          // same word no
                                          // 2. compare total score first
            {
                double score1 = ((FrequencyScore)o1).GetScore();
                double score2 = ((FrequencyScore)o2).GetScore();
                // SCR-2: use a fixed number to ensure result is not 0.
                if (score2 > score1)
                {
                    // from high to low
                    @out = 1;
                }
                else if (score2 < score1)
                {
                    @out = -1;
                }
                else                     // 3. alphabetic order of word
                {
                    @out = word2.CompareTo(word1);
                }
            }
            return(@out);
        }

Beispiel #18

0

Datei anzeigen

        /// <summary>
        /// This method uses context scores to find the correct term.
        /// </summary>
        /// <param name="inTokenObj">    the input tokenObj (single word) </param>
        /// <param name="cSpellApi"> CSpell Api object </param>
        /// <param name="debugFlag"> flag for debug print </param>
        /// <param name="tarPos"> position for target token </param>
        /// <param name="nonSpaceTokenList"> token list without space token(s)
        /// </param>
        /// <returns>    the corrected word in tokenObj if the coreTerm is OOV
        ///             and suggested word found. Otherwise, the original input token
        ///             is returned. </returns>
        public static TokenObj GetCorrectTerm(TokenObj inTokenObj, CSpellApi cSpellApi, bool debugFlag, int tarPos, List <TokenObj> nonSpaceTokenList)
        {
            // init
            int funcMode = cSpellApi.GetFuncMode();

            // get inWord from inTokenObj and init outTokenObj
            string   inWord      = inTokenObj.GetTokenStr();
            TokenObj outTokenObj = new TokenObj(inTokenObj);
            // 1. convert a word to coreTerm (no leading/ending space, punc, digit)
            int         ctType      = CoreTermUtil.CT_TYPE_SPACE_PUNC_DIGIT;
            CoreTermObj coreTermObj = new CoreTermObj(inWord, ctType);
            string      coreStr     = coreTermObj.GetCoreTerm();

            // 2. non-word detection and correction
            // check if the coreTerm is spelling errors - non-word
            //!NonWordDetector.IsValidWord(inWord, coreStr, cSpellApi, debugFlag);
            // TBD .. need to separate 1-to-1 and split
            if (NonWordDetector.IsDetect(inWord, coreStr, cSpellApi, debugFlag) == true)
            {
                cSpellApi.UpdateDetectNo();
                // TBD, should take care of possessive xxx's here
                // 3.1 get 1-to-1 candidates set from correction, no split
                HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(coreStr, cSpellApi);
                // add split
                // TBD ...
                if (funcMode != CSpellApi.FUNC_MODE_NW_1)
                {
                    // 3.2 get candidates from split
                    int maxSplitNo            = cSpellApi.GetCanNwMaxSplitNo();
                    HashSet <string> splitSet = NonWordSplitCandidates.GetCandidates(coreStr, cSpellApi, maxSplitNo);
                    // 3.4 set split candidates to candidate
                    if (funcMode == CSpellApi.FUNC_MODE_NW_S)
                    {
                        candSet = new HashSet <string>(splitSet);
                    }
                    else                         // 3.4 add split candidates
                    {
                        candSet.addAll(splitSet);
                    }
                }
                // 4. Ranking: get top ranked candidates as corrected terms
                // 4.1 from orthoGraphic

                /*
                 * // not used context
                 * String topRankStr = RankByMode.GetTopRankStr(coreStr, candSet,
                 *  cSpellApi, debugFlag);
                 */
                // in case of using context
                string topRankStr = RankNonWordByMode.GetTopRankStr(coreStr, candSet, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);
                // 5 update coreTerm and convert back to tokenObj
                coreTermObj.SetCoreTerm(topRankStr);
                string outWord = coreTermObj.ToString();
                // 6. update info if there is a process
                if (inWord.Equals(outWord) == false)
                {
                    outTokenObj.SetTokenStr(outWord);
                    if (TermUtil.IsMultiword(outWord) == true)
                    {
                        cSpellApi.UpdateCorrectNo();
                        outTokenObj.AddProcToHist(TokenObj.HIST_NW_S);                         //split
                        DebugPrint.PrintCorrect("NW", "NonWordCorrector-Split", inWord, outWord, debugFlag);
                    }
                    else                         // 1To1 correct
                    {
                        cSpellApi.UpdateCorrectNo();
                        outTokenObj.AddProcToHist(TokenObj.HIST_NW_1);
                        DebugPrint.PrintCorrect("NW", "NonWordCorrector-1To1", inWord, outWord, debugFlag);
                    }
                }
            }
            return(outTokenObj);
        }

C# (CSharp) TermUtil Beispiele