示例#1
0
        private static void AddMergeObj(string tarWord, string orgMergeWord, string mergeWord, int mergeNo, int startIndex, int tarIndex, int endIndex, int startPos, int tarPos, int endPos, HashSet <MergeObj> mergeSet, RootDictionary suggestDic, RootDictionary aADic)
        {
            // 1. convert merged word to coreTerm
            int  ctType = CoreTermUtil.CT_TYPE_SPACE_PUNC;
            bool lcFlag = true;
            // only take care of the end punctuation for the coreTerm
            string coreStr = TermUtil.StripEndPuncSpace(mergeWord);

            // 2. check if the coreStr of mergeWord is in suggest Dic
            // the merge word is not a Aa, assuming no merge for Aa
            // becase Aa is short enough
            if ((suggestDic.IsDicWord(coreStr) == true) && (aADic.IsDicWord(coreStr) == false))
            {
                MergeObj mergeObj = new MergeObj(tarWord, orgMergeWord, mergeWord, coreStr, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos);
                mergeSet.Add(mergeObj);
            }
        }
示例#2
0
        // public method
        /// <summary>
        /// The core method to correct a word by following steps:
        /// <ul>
        /// <li>Convert inToken to removeEndPuncStr
        /// <li>detect if misspell (OOV) - non-word, exclude Aa
        /// <li>get candidates
        ///     <ul>
        ///     <li>get candidates from merge.
        ///     </ul>
        /// <li>Rank candidates
        ///     <ul>
        ///     <li>orthographic
        ///     <li>frequency
        ///     <li>context
        ///     </ul>
        /// <li>Update information
        ///
        /// </ul>
        /// </summary>
        /// <param name="tarPos">    postion of target token </param>
        /// <param name="nonSpaceTokenList"> token list without space token(s) </param>
        /// <param name="cSpellApi"> CSpell Api object </param>
        /// <param name="debugFlag"> flag for debug print
        /// </param>
        /// <returns>    the corrected merged word in MergeObj if the token is OOV
        ///             and suggested merged word found.
        ///             Otherwise, a null of MergeObj is returned. </returns>
        // return the original term if no good correctin are found
        public static MergeObj GetCorrectTerm(int tarPos, List <TokenObj> nonSpaceTokenList, CSpellApi cSpellApi, bool debugFlag)
        {
            // get tarWord from tarTokenObj and init outTokenObj
            TokenObj tarTokenObj = nonSpaceTokenList[tarPos];
            string   tarWord     = tarTokenObj.GetTokenStr();
            MergeObj outMergeObj = null;             // no merge if it is null
            // 1. only remove ending punctuation for coreTerm
            string coreStr = TermUtil.StripEndPuncSpace(tarWord).ToLower();

            // 2. non-word correction
            // check if tarWord and removeEndPuncStr is OOV
            if (NonWordMergeDetector.IsDetect(tarWord, coreStr, cSpellApi, debugFlag) == true)
            {
                cSpellApi.UpdateDetectNo();
                // 3. get candidates from merge
                HashSet <MergeObj> mergeSet = NonWordMergeCandidates.GetCandidates(tarPos, nonSpaceTokenList, cSpellApi);
                // 4. Ranking: get top ranked candidates as corrected terms
                // 4.1 just use frenquency or context, no orthoGraphic
                // in case of using context
                outMergeObj = RankNonWordMergeByMode.GetTopRankMergeObj(mergeSet, cSpellApi, tarPos, nonSpaceTokenList, debugFlag);
            }
            return(outMergeObj);
        }