/// <summary> /// cSpell correction process, output to an ArrayList of TokenObj by using /// funcMode and rankMode from configuratin files, with debug print option. /// </summary> /// <param name="inText"> input text to be corrected </param> /// <param name="debugFlag"> boolean flag for debug print </param> /// <returns> an ArrayList of TokenObj </returns> public virtual List <TokenObj> ProcessToTokenObj(string inText, bool debugFlag) { DebugPrint.Println("====== SpellApi.Process( ), funcMode: " + funcMode_ + ", rankMode: " + rankMode_ + " ======", debugFlag); // non-dictionary and dictionary base correction List <TokenObj> inTokenList = TextObj.TextToTokenList(inText); List <TokenObj> outTokenList = CorrectionApi.ProcessByTokenObj(inTokenList, this, debugFlag); return(outTokenList); }
public virtual void AddDictionaries(string inFilePaths, bool debugFlag) { DebugPrint.Println("- Dictionary Files: [" + inFilePaths + "].", debugFlag); string[] inFileStrArray = inFilePaths.Split(":", true); foreach (string inFileStr in inFileStrArray) { DebugPrint.Println("--- Add Dictionary: [" + inFileStr + "].", debugFlag); AddDictionary(inFileStr); } }
public virtual void AddDictionaries2(string inFiles, bool debugFlag) { DebugPrint.Println("- Dictionary Files: [" + inFiles + "].", debugFlag); string[] inFileStrArray = inFiles.Split("|", true); foreach (var item in inFileStrArray) { DebugPrint.Println("--- Add Dictionary: [" + item + "].", debugFlag); AddDictionary(item); } }
public virtual void AddDictionaries(string inFiles, string rootPath, bool debugFlag) { DebugPrint.Println("- Dictionary Files: [" + inFiles + "].", debugFlag); string[] inFileStrArray = inFiles.Split(":", true); foreach (string inFileStr in inFileStrArray) { string inDicFile = rootPath + inFileStr; DebugPrint.Println("--- Add Dictionary: [" + inDicFile + "].", debugFlag); AddDictionary(inDicFile); } }
private static bool CheckRealWord1To1Rules(ContextScore topContextScore, string inStr, int tarPos, int tarSize, List <TokenObj> nonSpaceTokenList, Word2Vec word2VecIm, Word2Vec word2VecOm, bool word2VecSkipWord, int contextRadius, double rw1To1Factor, bool debugFlag) { // return false if no topCand found if ((topContextScore == null) || (topContextScore.GetTerm().Equals(inStr))) { return(false); } // 1. get the word2Vec score for the org inStr b4 one-to-one // 1.1 wordVec for context DoubleVec contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, word2VecIm, contextRadius, word2VecSkipWord, debugFlag); // 1.2 wordVec for the original words before one-to-one ContextScore orgCs = new ContextScore(inStr, contextVec, word2VecOm); DebugPrint.Println("--- Real-Word One-To-One Context Score Detail: ---", debugFlag); DebugPrint.Println("- Score - orgTerm: " + orgCs.ToString(), debugFlag); DebugPrint.Println("- Score - top 1-to-1: " + topContextScore.ToString(), debugFlag); DebugPrint.Println("- rw1To1Factor: " + rw1To1Factor, debugFlag); // Score rules for one-to-one double orgScore = orgCs.GetScore(); double topScore = topContextScore.GetScore(); bool flag = false; // 2.1 no one-to-one correction if orgScore is 0.0d, no word2Vec information if (orgScore < 0.0d) { // 2.2a one-to-one if the org score is negative and top score is positive if (topScore > 0.0d) { // another rule for word2Vec on real-word if (((topScore - orgScore) > 0.085) && (orgScore > -0.085)) // help from 0.6812 to 0.6877 { flag = true; } } // 2.2b one-to-one if the org score is negative and top score is better else if ((topScore < 0.0d) && (topScore > orgScore * rw1To1Factor)) { flag = true; } } else if (orgScore > 0.0d) { // 2.3a merge if the org score is positive and better 0.01*topScore if (topScore * rw1To1Factor > orgScore) { flag = true; } } return(flag); }