// private methods private static int RunTest(bool detailFlag, long limitNo) { // init dic string configFile = "../data/Config/cSpell.properties"; CSpellApi cSpellApi = new CSpellApi(configFile); WordWcMap wordWcMap = cSpellApi.GetWordWcMap(); double wf1 = cSpellApi.GetOrthoScoreEdDistFac(); double wf2 = cSpellApi.GetOrthoScorePhoneticFac(); double wf3 = cSpellApi.GetOrthoScoreOverlapFac(); cSpellApi.SetRankMode(CSpellApi.RANK_MODE_NOISY_CHANNEL); // provide cmdLine interface int returnValue = 0; NoisyChannelScoreComparator <NoisyChannelScore> ncsc = new NoisyChannelScoreComparator <NoisyChannelScore>(); try { StreamReader stdInput = new StreamReader(Console.OpenStandardInput()); try { string inText = null; Console.WriteLine("- Please input a text (type \"Ctl-d\" to quit) > "); while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null)) { // --------------------------------- // Get spell correction on the input // --------------------------------- // get all possible candidates HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(inText, cSpellApi); Console.WriteLine("-- canSet.size(): " + candSet.Count); // get final suggestion string topRankStr = GetTopRankStr(inText, candSet, wordWcMap, wf1, wf2, wf3); Console.WriteLine("- top tank str: " + topRankStr); // print details if (detailFlag == true) { HashSet <NoisyChannelScore> candScoreSet = GetCandidateScoreSet(inText, candSet, wordWcMap, wf1, wf2, wf3); Console.WriteLine("------ Suggestion List ------"); var list = candScoreSet.OrderBy(x => x, ncsc).Take((int)limitNo).Select(obj => obj.ToString()).ToList(); foreach (var item in list) { Console.WriteLine(item); } } } } catch (Exception e2) { Console.Error.WriteLine(e2.Message); returnValue = -1; } } catch (Exception e) { Console.Error.WriteLine(e.Message); returnValue = -1; } return(returnValue); }
/// <summary> /// This method uses context scores to find the correct term. /// </summary> /// <param name="inTokenObj"> the input tokenObj (single word) </param> /// <param name="cSpellApi"> CSpell Api object </param> /// <param name="debugFlag"> flag for debug print </param> /// <param name="tarPos"> position for target token </param> /// <param name="nonSpaceTokenList"> token list without space token(s) /// </param> /// <returns> the corrected word in tokenObj if the coreTerm is OOV /// and suggested word found. Otherwise, the original input token /// is returned. </returns> public static TokenObj GetCorrectTerm(TokenObj inTokenObj, CSpellApi cSpellApi, bool debugFlag, int tarPos, List <TokenObj> nonSpaceTokenList) { // init int funcMode = cSpellApi.GetFuncMode(); // get inWord from inTokenObj and init outTokenObj string inWord = inTokenObj.GetTokenStr(); TokenObj outTokenObj = new TokenObj(inTokenObj); // 1. convert a word to coreTerm (no leading/ending space, punc, digit) int ctType = CoreTermUtil.CT_TYPE_SPACE_PUNC_DIGIT; CoreTermObj coreTermObj = new CoreTermObj(inWord, ctType); string coreStr = coreTermObj.GetCoreTerm(); // 2. non-word detection and correction // check if the coreTerm is spelling errors - non-word //!NonWordDetector.IsValidWord(inWord, coreStr, cSpellApi, debugFlag); // TBD .. need to separate 1-to-1 and split if (NonWordDetector.IsDetect(inWord, coreStr, cSpellApi, debugFlag) == true) { cSpellApi.UpdateDetectNo(); // TBD, should take care of possessive xxx's here // 3.1 get 1-to-1 candidates set from correction, no split HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(coreStr, cSpellApi); // add split // TBD ... if (funcMode != CSpellApi.FUNC_MODE_NW_1) { // 3.2 get candidates from split int maxSplitNo = cSpellApi.GetCanNwMaxSplitNo(); HashSet <string> splitSet = NonWordSplitCandidates.GetCandidates(coreStr, cSpellApi, maxSplitNo); // 3.4 set split candidates to candidate if (funcMode == CSpellApi.FUNC_MODE_NW_S) { candSet = new HashSet <string>(splitSet); } else // 3.4 add split candidates { candSet.addAll(splitSet); } } // 4. Ranking: get top ranked candidates as corrected terms // 4.1 from orthoGraphic /* * // not used context * String topRankStr = RankByMode.GetTopRankStr(coreStr, candSet, * cSpellApi, debugFlag); */ // in case of using context string topRankStr = RankNonWordByMode.GetTopRankStr(coreStr, candSet, cSpellApi, debugFlag, tarPos, nonSpaceTokenList); // 5 update coreTerm and convert back to tokenObj coreTermObj.SetCoreTerm(topRankStr); string outWord = coreTermObj.ToString(); // 6. update info if there is a process if (inWord.Equals(outWord) == false) { outTokenObj.SetTokenStr(outWord); if (TermUtil.IsMultiword(outWord) == true) { cSpellApi.UpdateCorrectNo(); outTokenObj.AddProcToHist(TokenObj.HIST_NW_S); //split DebugPrint.PrintCorrect("NW", "NonWordCorrector-Split", inWord, outWord, debugFlag); } else // 1To1 correct { cSpellApi.UpdateCorrectNo(); outTokenObj.AddProcToHist(TokenObj.HIST_NW_1); DebugPrint.PrintCorrect("NW", "NonWordCorrector-1To1", inWord, outWord, debugFlag); } } } return(outTokenObj); }
// private methods // this test is not verified private static int RunTest(bool detailFlag, int tarPos, int tarSize, int contextRadius, long limitNo) { // init dic string configFile = "../data/Config/cSpell.properties"; CSpellApi cSpellApi = new CSpellApi(configFile); cSpellApi.SetRankMode(CSpellApi.RANK_MODE_CONTEXT); Word2Vec word2VecIm = cSpellApi.GetWord2VecIm(); Word2Vec word2VecOm = cSpellApi.GetWord2VecOm(); bool word2VecSkipWord = cSpellApi.GetWord2VecSkipWord(); ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>(); // provide cmdLine interface int returnValue = 0; try { StreamReader stdInput = new StreamReader(Console.OpenStandardInput()); try { string inText = null; Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > "); while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null)) { // --------------------------------- // Get spell correction on the input // --------------------------------- // convert input text to TokenObj TextObj textObj = new TextObj(inText); List <TokenObj> inTextList = textObj.GetTokenList(); // *2 because tokenList include space string tarWord = inTextList[tarPos * 2].GetTokenStr(); for (int i = 1; i < tarSize; i++) { int ii = (tarPos + 1) * 2; tarWord += " " + inTextList[ii].GetTokenStr(); } Console.WriteLine("- input text: [" + inText + "]"); Console.WriteLine("- target: [" + tarPos + "|" + tarSize + "|" + tarWord + "]"); Console.WriteLine("- context radius: " + contextRadius); // get all possible candidates HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(tarWord, cSpellApi); candSet.Add(tarWord); // add the original word Console.WriteLine("-- canSet.size(): " + candSet.Count); // get final suggestion // remove space token from the list List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList); string topRankStr = GetTopRankStr(tarWord, candSet, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag); Console.WriteLine("- top rank str: " + topRankStr); // print details if (detailFlag == true) { HashSet <ContextScore> candScoreSet = GetCandidateScoreSet(candSet, tarPos, tarSize, inTextList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag); Console.WriteLine("------ Suggestion List ------"); var list = candScoreSet.OrderBy(x => x, csc).Take((int)limitNo).Select(obj => obj.ToString()); foreach (var item in list) { Console.WriteLine(item); } } // print the prompt Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > "); } } catch (Exception e2) { Console.Error.WriteLine(e2.Message); returnValue = -1; } } catch (Exception e) { Console.Error.WriteLine(e.Message); returnValue = -1; } return(returnValue); }