예제 #1
0
        /// <summary>
        /// cSpell correction process, output to an ArrayList of TokenObj by using
        /// funcMode and rankMode from configuratin files, with debug print option.
        /// </summary>
        /// <param name="inText">   input text to be corrected </param>
        /// <param name="debugFlag"> boolean flag for debug print </param>
        /// <returns>  an ArrayList of TokenObj </returns>
        public virtual List <TokenObj> ProcessToTokenObj(string inText, bool debugFlag)
        {
            DebugPrint.Println("====== SpellApi.Process( ), funcMode: " + funcMode_ + ", rankMode: " + rankMode_ + " ======", debugFlag);
            // non-dictionary and dictionary base correction
            List <TokenObj> inTokenList  = TextObj.TextToTokenList(inText);
            List <TokenObj> outTokenList = CorrectionApi.ProcessByTokenObj(inTokenList, this, debugFlag);

            return(outTokenList);
        }
예제 #2
0
 public virtual void AddDictionaries(string inFilePaths, bool debugFlag)
 {
     DebugPrint.Println("- Dictionary Files: [" + inFilePaths + "].", debugFlag);
     string[] inFileStrArray = inFilePaths.Split(":", true);
     foreach (string inFileStr in inFileStrArray)
     {
         DebugPrint.Println("--- Add Dictionary: [" + inFileStr + "].", debugFlag);
         AddDictionary(inFileStr);
     }
 }
예제 #3
0
 public virtual void AddDictionaries2(string inFiles, bool debugFlag)
 {
     DebugPrint.Println("- Dictionary Files: [" + inFiles + "].", debugFlag);
     string[] inFileStrArray = inFiles.Split("|", true);
     foreach (var item in inFileStrArray)
     {
         DebugPrint.Println("--- Add Dictionary: [" + item + "].", debugFlag);
         AddDictionary(item);
     }
 }
예제 #4
0
 public virtual void AddDictionaries(string inFiles, string rootPath, bool debugFlag)
 {
     DebugPrint.Println("- Dictionary Files: [" + inFiles + "].", debugFlag);
     string[] inFileStrArray = inFiles.Split(":", true);
     foreach (string inFileStr in inFileStrArray)
     {
         string inDicFile = rootPath + inFileStr;
         DebugPrint.Println("--- Add Dictionary: [" + inDicFile + "].", debugFlag);
         AddDictionary(inDicFile);
     }
 }
        private static bool CheckRealWord1To1Rules(ContextScore topContextScore, string inStr, int tarPos, int tarSize, List <TokenObj> nonSpaceTokenList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, double rw1To1Factor, bool debugFlag)
        {
            // return false if no topCand found
            if ((topContextScore == null) || (topContextScore.GetTerm().Equals(inStr)))
            {
                return(false);
            }
            // 1. get the word2Vec score for the org inStr b4 one-to-one
            // 1.1 wordVec for context
            DoubleVec contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, word2VecIm, contextRadius, word2VecSkipWord, debugFlag);
            // 1.2 wordVec for the original words before one-to-one
            ContextScore orgCs = new ContextScore(inStr, contextVec, word2VecOm);

            DebugPrint.Println("--- Real-Word One-To-One Context Score Detail: ---", debugFlag);
            DebugPrint.Println("- Score - orgTerm: " + orgCs.ToString(), debugFlag);
            DebugPrint.Println("- Score - top 1-to-1: " + topContextScore.ToString(), debugFlag);
            DebugPrint.Println("- rw1To1Factor: " + rw1To1Factor, debugFlag);
            // Score rules for one-to-one
            double orgScore = orgCs.GetScore();
            double topScore = topContextScore.GetScore();
            bool   flag     = false;

            // 2.1 no one-to-one correction if orgScore is 0.0d, no word2Vec information
            if (orgScore < 0.0d)
            {
                // 2.2a one-to-one if the org score is negative and top score is positive
                if (topScore > 0.0d)
                {
                    // another rule for word2Vec on real-word
                    if (((topScore - orgScore) > 0.085) && (orgScore > -0.085))                       // help from 0.6812 to 0.6877
                    {
                        flag = true;
                    }
                }
                // 2.2b one-to-one if the org score is negative and top score is better
                else if ((topScore < 0.0d) && (topScore > orgScore * rw1To1Factor))
                {
                    flag = true;
                }
            }
            else if (orgScore > 0.0d)
            {
                // 2.3a merge if the org score is positive and better 0.01*topScore
                if (topScore * rw1To1Factor > orgScore)
                {
                    flag = true;
                }
            }
            return(flag);
        }