// private methods
        private static int RunTest(bool detailFlag, long limitNo)
        {
            // init dic
            string    configFile = "../data/Config/cSpell.properties";
            CSpellApi cSpellApi  = new CSpellApi(configFile);
            WordWcMap wordWcMap  = cSpellApi.GetWordWcMap();
            double    wf1        = cSpellApi.GetOrthoScoreEdDistFac();
            double    wf2        = cSpellApi.GetOrthoScorePhoneticFac();
            double    wf3        = cSpellApi.GetOrthoScoreOverlapFac();

            cSpellApi.SetRankMode(CSpellApi.RANK_MODE_NOISY_CHANNEL);
            // provide cmdLine interface
            int returnValue = 0;
            NoisyChannelScoreComparator <NoisyChannelScore> ncsc = new NoisyChannelScoreComparator <NoisyChannelScore>();

            try {
                StreamReader stdInput = new StreamReader(Console.OpenStandardInput());
                try {
                    string inText = null;
                    Console.WriteLine("- Please input a text (type \"Ctl-d\" to quit) > ");
                    while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null))
                    {
                        // ---------------------------------
                        // Get spell correction on the input
                        // ---------------------------------
                        // get all possible candidates
                        HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(inText, cSpellApi);
                        Console.WriteLine("-- canSet.size(): " + candSet.Count);
                        // get final suggestion
                        string topRankStr = GetTopRankStr(inText, candSet, wordWcMap, wf1, wf2, wf3);
                        Console.WriteLine("- top tank str: " + topRankStr);
                        // print details
                        if (detailFlag == true)
                        {
                            HashSet <NoisyChannelScore> candScoreSet = GetCandidateScoreSet(inText, candSet, wordWcMap, wf1, wf2, wf3);
                            Console.WriteLine("------ Suggestion List ------");
                            var list = candScoreSet.OrderBy(x => x, ncsc).Take((int)limitNo).Select(obj => obj.ToString()).ToList();
                            foreach (var item in list)
                            {
                                Console.WriteLine(item);
                            }
                        }
                    }
                } catch (Exception e2) {
                    Console.Error.WriteLine(e2.Message);
                    returnValue = -1;
                }
            } catch (Exception e) {
                Console.Error.WriteLine(e.Message);
                returnValue = -1;
            }
            return(returnValue);
        }
Пример #2
0
        /// <summary>
        /// This method uses context scores to find the correct term.
        /// </summary>
        /// <param name="inTokenObj">    the input tokenObj (single word) </param>
        /// <param name="cSpellApi"> CSpell Api object </param>
        /// <param name="debugFlag"> flag for debug print </param>
        /// <param name="tarPos"> position for target token </param>
        /// <param name="nonSpaceTokenList"> token list without space token(s)
        /// </param>
        /// <returns>    the corrected word in tokenObj if the coreTerm is OOV
        ///             and suggested word found. Otherwise, the original input token
        ///             is returned. </returns>
        public static TokenObj GetCorrectTerm(TokenObj inTokenObj, CSpellApi cSpellApi, bool debugFlag, int tarPos, List <TokenObj> nonSpaceTokenList)
        {
            // init
            int funcMode = cSpellApi.GetFuncMode();

            // get inWord from inTokenObj and init outTokenObj
            string   inWord      = inTokenObj.GetTokenStr();
            TokenObj outTokenObj = new TokenObj(inTokenObj);
            // 1. convert a word to coreTerm (no leading/ending space, punc, digit)
            int         ctType      = CoreTermUtil.CT_TYPE_SPACE_PUNC_DIGIT;
            CoreTermObj coreTermObj = new CoreTermObj(inWord, ctType);
            string      coreStr     = coreTermObj.GetCoreTerm();

            // 2. non-word detection and correction
            // check if the coreTerm is spelling errors - non-word
            //!NonWordDetector.IsValidWord(inWord, coreStr, cSpellApi, debugFlag);
            // TBD .. need to separate 1-to-1 and split
            if (NonWordDetector.IsDetect(inWord, coreStr, cSpellApi, debugFlag) == true)
            {
                cSpellApi.UpdateDetectNo();
                // TBD, should take care of possessive xxx's here
                // 3.1 get 1-to-1 candidates set from correction, no split
                HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(coreStr, cSpellApi);
                // add split
                // TBD ...
                if (funcMode != CSpellApi.FUNC_MODE_NW_1)
                {
                    // 3.2 get candidates from split
                    int maxSplitNo            = cSpellApi.GetCanNwMaxSplitNo();
                    HashSet <string> splitSet = NonWordSplitCandidates.GetCandidates(coreStr, cSpellApi, maxSplitNo);
                    // 3.4 set split candidates to candidate
                    if (funcMode == CSpellApi.FUNC_MODE_NW_S)
                    {
                        candSet = new HashSet <string>(splitSet);
                    }
                    else                         // 3.4 add split candidates
                    {
                        candSet.addAll(splitSet);
                    }
                }
                // 4. Ranking: get top ranked candidates as corrected terms
                // 4.1 from orthoGraphic

                /*
                 * // not used context
                 * String topRankStr = RankByMode.GetTopRankStr(coreStr, candSet,
                 *  cSpellApi, debugFlag);
                 */
                // in case of using context
                string topRankStr = RankNonWordByMode.GetTopRankStr(coreStr, candSet, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);
                // 5 update coreTerm and convert back to tokenObj
                coreTermObj.SetCoreTerm(topRankStr);
                string outWord = coreTermObj.ToString();
                // 6. update info if there is a process
                if (inWord.Equals(outWord) == false)
                {
                    outTokenObj.SetTokenStr(outWord);
                    if (TermUtil.IsMultiword(outWord) == true)
                    {
                        cSpellApi.UpdateCorrectNo();
                        outTokenObj.AddProcToHist(TokenObj.HIST_NW_S);                         //split
                        DebugPrint.PrintCorrect("NW", "NonWordCorrector-Split", inWord, outWord, debugFlag);
                    }
                    else                         // 1To1 correct
                    {
                        cSpellApi.UpdateCorrectNo();
                        outTokenObj.AddProcToHist(TokenObj.HIST_NW_1);
                        DebugPrint.PrintCorrect("NW", "NonWordCorrector-1To1", inWord, outWord, debugFlag);
                    }
                }
            }
            return(outTokenObj);
        }
Пример #3
0
        // private methods
        // this test is not verified
        private static int RunTest(bool detailFlag, int tarPos, int tarSize, int contextRadius, long limitNo)
        {
            // init dic
            string    configFile = "../data/Config/cSpell.properties";
            CSpellApi cSpellApi  = new CSpellApi(configFile);

            cSpellApi.SetRankMode(CSpellApi.RANK_MODE_CONTEXT);
            Word2Vec word2VecIm       = cSpellApi.GetWord2VecIm();
            Word2Vec word2VecOm       = cSpellApi.GetWord2VecOm();
            bool     word2VecSkipWord = cSpellApi.GetWord2VecSkipWord();
            ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
            // provide cmdLine interface
            int returnValue = 0;

            try {
                StreamReader stdInput = new StreamReader(Console.OpenStandardInput());
                try {
                    string inText = null;
                    Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null))
                    {
                        // ---------------------------------
                        // Get spell correction on the input
                        // ---------------------------------
                        // convert input text to TokenObj
                        TextObj         textObj    = new TextObj(inText);
                        List <TokenObj> inTextList = textObj.GetTokenList();
                        // *2 because tokenList include space
                        string tarWord = inTextList[tarPos * 2].GetTokenStr();
                        for (int i = 1; i < tarSize; i++)
                        {
                            int ii = (tarPos + 1) * 2;
                            tarWord += " " + inTextList[ii].GetTokenStr();
                        }
                        Console.WriteLine("- input text: [" + inText + "]");
                        Console.WriteLine("- target: [" + tarPos + "|" + tarSize + "|" + tarWord + "]");
                        Console.WriteLine("- context radius: " + contextRadius);
                        // get all possible candidates
                        HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(tarWord, cSpellApi);
                        candSet.Add(tarWord);                         // add the original word
                        Console.WriteLine("-- canSet.size(): " + candSet.Count);
                        // get final suggestion
                        // remove space token from the list
                        List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
                        string          topRankStr        = GetTopRankStr(tarWord, candSet, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                        Console.WriteLine("- top rank str: " + topRankStr);
                        // print details
                        if (detailFlag == true)
                        {
                            HashSet <ContextScore> candScoreSet = GetCandidateScoreSet(candSet, tarPos, tarSize, inTextList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                            Console.WriteLine("------ Suggestion List ------");
                            var list = candScoreSet.OrderBy(x => x, csc).Take((int)limitNo).Select(obj => obj.ToString());
                            foreach (var item in list)
                            {
                                Console.WriteLine(item);
                            }
                        }
                        // print the prompt
                        Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    }
                } catch (Exception e2) {
                    Console.Error.WriteLine(e2.Message);
                    returnValue = -1;
                }
            } catch (Exception e) {
                Console.Error.WriteLine(e.Message);
                returnValue = -1;
            }
            return(returnValue);
        }