C# (CSharp) TermUtil.IsMultiword примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: TermUtil

Метод/Функция: IsMultiword

Примеров на hotexamples.com: 2

C# (CSharp) TermUtil.IsMultiword - 2 примера найдено. Это лучшие примеры C# (CSharp) кода для TermUtil.IsMultiword, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ToWordList(10)

GetWordNo(2)

IsMultiword(2)

StripEndPuncSpace(2)

StringTrim(1)

Trim(1)

Пример #1

Показать файл

Файл: Split1To1Corrector.cs Проект: alishchytovych/CSpell.Net

        // 3 operations:
        // convert a tokenObj to a arrayList of tokenObjs:
        // 1. merge (delete) a tokenObj if the str is empty (length = 0)
        // 2. keep the same tokenObj if str is a single word
        // 3. split a tokenObj if the str contains space
        public static void AddSplit1To1Correction(List <TokenObj> inList, TokenObj inToken)
        {
            string tokenStr = inToken.GetTokenStr();

            // 1. do not add to the list if the token is empty
            if ((string.ReferenceEquals(tokenStr, null)) || (tokenStr.Length == 0))
            {
                // do nothing
            }
            // 2. keep the same tokenObj if there is no change
            // 1-to-1 correction
            else if (TermUtil.IsMultiword(tokenStr) == false)
            {
                Add1To1Correction(inList, inToken);
                // TB Deleted
                //inList.add(inToken);
            }
            // 3. split a tokenObj to an arrayList if the str has space
            else
            {
                AddSplitCorrection(inList, inToken);

                /* TB deleted
                 * ArrayList<TokenObj> tempTokenList = new ArrayList<TokenObj>();
                 * // keep token and delimiters
                 * String[] tokenArray = tokenStr.split(TextObj.patternStrSpace_);
                 * tempTokenList = new ArrayList<TokenObj>(Arrays.stream(tokenArray)
                 *  .map(token -> new TokenObj(inToken, token))
                 *  .collect(Collectors.toList()));
                 * inList.addAll(tempTokenList);
                 */
            }
        }

Пример #2

Показать файл

        /// <summary>
        /// This method uses context scores to find the correct term.
        /// </summary>
        /// <param name="inTokenObj">    the input tokenObj (single word) </param>
        /// <param name="cSpellApi"> CSpell Api object </param>
        /// <param name="debugFlag"> flag for debug print </param>
        /// <param name="tarPos"> position for target token </param>
        /// <param name="nonSpaceTokenList"> token list without space token(s)
        /// </param>
        /// <returns>    the corrected word in tokenObj if the coreTerm is OOV
        ///             and suggested word found. Otherwise, the original input token
        ///             is returned. </returns>
        public static TokenObj GetCorrectTerm(TokenObj inTokenObj, CSpellApi cSpellApi, bool debugFlag, int tarPos, List <TokenObj> nonSpaceTokenList)
        {
            // init
            int funcMode = cSpellApi.GetFuncMode();

            // get inWord from inTokenObj and init outTokenObj
            string   inWord      = inTokenObj.GetTokenStr();
            TokenObj outTokenObj = new TokenObj(inTokenObj);
            // 1. convert a word to coreTerm (no leading/ending space, punc, digit)
            int         ctType      = CoreTermUtil.CT_TYPE_SPACE_PUNC_DIGIT;
            CoreTermObj coreTermObj = new CoreTermObj(inWord, ctType);
            string      coreStr     = coreTermObj.GetCoreTerm();

            // 2. non-word detection and correction
            // check if the coreTerm is spelling errors - non-word
            //!NonWordDetector.IsValidWord(inWord, coreStr, cSpellApi, debugFlag);
            // TBD .. need to separate 1-to-1 and split
            if (NonWordDetector.IsDetect(inWord, coreStr, cSpellApi, debugFlag) == true)
            {
                cSpellApi.UpdateDetectNo();
                // TBD, should take care of possessive xxx's here
                // 3.1 get 1-to-1 candidates set from correction, no split
                HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(coreStr, cSpellApi);
                // add split
                // TBD ...
                if (funcMode != CSpellApi.FUNC_MODE_NW_1)
                {
                    // 3.2 get candidates from split
                    int maxSplitNo            = cSpellApi.GetCanNwMaxSplitNo();
                    HashSet <string> splitSet = NonWordSplitCandidates.GetCandidates(coreStr, cSpellApi, maxSplitNo);
                    // 3.4 set split candidates to candidate
                    if (funcMode == CSpellApi.FUNC_MODE_NW_S)
                    {
                        candSet = new HashSet <string>(splitSet);
                    }
                    else                         // 3.4 add split candidates
                    {
                        candSet.addAll(splitSet);
                    }
                }
                // 4. Ranking: get top ranked candidates as corrected terms
                // 4.1 from orthoGraphic

                /*
                 * // not used context
                 * String topRankStr = RankByMode.GetTopRankStr(coreStr, candSet,
                 *  cSpellApi, debugFlag);
                 */
                // in case of using context
                string topRankStr = RankNonWordByMode.GetTopRankStr(coreStr, candSet, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);
                // 5 update coreTerm and convert back to tokenObj
                coreTermObj.SetCoreTerm(topRankStr);
                string outWord = coreTermObj.ToString();
                // 6. update info if there is a process
                if (inWord.Equals(outWord) == false)
                {
                    outTokenObj.SetTokenStr(outWord);
                    if (TermUtil.IsMultiword(outWord) == true)
                    {
                        cSpellApi.UpdateCorrectNo();
                        outTokenObj.AddProcToHist(TokenObj.HIST_NW_S);                         //split
                        DebugPrint.PrintCorrect("NW", "NonWordCorrector-Split", inWord, outWord, debugFlag);
                    }
                    else                         // 1To1 correct
                    {
                        cSpellApi.UpdateCorrectNo();
                        outTokenObj.AddProcToHist(TokenObj.HIST_NW_1);
                        DebugPrint.PrintCorrect("NW", "NonWordCorrector-1To1", inWord, outWord, debugFlag);
                    }
                }
            }
            return(outTokenObj);
        }