private static void TestSplit(CSpellApi cSpellApi)
        {
            // setup test case
            // 10349.txt
            //String inText = "sounding in my ear every time for along time.";
            // 13864.txt
            string          inText            = "I donate my self to be apart of this study.";
            TextObj         textObj           = new TextObj(inText);
            List <TokenObj> inTextList        = textObj.GetTokenList();
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
            //int tarPos = 7;
            int      tarPos     = 6;
            TokenObj inTokenObj = nonSpaceTokenList[tarPos];
            bool     debugFlag  = false;

            Console.WriteLine("====== Real-Word One-To-One Correction Test =====");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            Console.WriteLine("-- tarPos: [" + tarPos + "]");
            Console.WriteLine("-- inTokenObj: [" + inTokenObj.ToString() + "]");
            // get the correct term
            TokenObj outTokenObj = GetCorrectTerm(inTokenObj, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);

            // print out
            Console.WriteLine("--------- GetCorrectTermStr( ) -----------");
            Console.WriteLine("-- outTokenObj: [" + outTokenObj.ToString() + "]");
        }
        private static void TestOnSet(Word2Vec w2vIm, Word2Vec w2vOm)
        {
            string inText = "He was diagnosed early on set dementia 3 years ago.";

            TextObj         textObj    = new TextObj(inText);
            List <TokenObj> inTextList = textObj.GetTokenList();
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);

            Console.WriteLine("==========================================");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            int  tarPos           = 4;
            int  tarSize          = 2;   // "on set" has 2 tokens
            int  radius           = 2;
            bool word2VecSkipWord = true;
            bool debugFlag        = false;
            // 1 context with window radius
            DoubleVec    contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
            string       str1       = "onset";
            ContextScore s1         = new ContextScore(str1, contextVec, w2vOm);
            string       str2       = "on set";
            ContextScore s2         = new ContextScore(str2, contextVec, w2vOm);

            Console.WriteLine("- [" + str1 + "]: " + s1.ToString());
            Console.WriteLine("- [" + str2 + "]: " + s2.ToString());
        }
Exemple #3
0
        private static void Test1To1(CSpellApi cSpellApi)
        {
            // setup test case
            // 51.txt
            //String inText = "You'd thing that this is good.";
            //String inText = "The doctor thing that this is good.";
            string          inText            = "you would thing that is good.";
            TextObj         textObj           = new TextObj(inText);
            List <TokenObj> inTextList        = textObj.GetTokenList();
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
            int             tarPos            = 2;
            TokenObj        inTokenObj        = nonSpaceTokenList[tarPos];
            bool            debugFlag         = false;

            Console.WriteLine("====== Real-Word One-To-One Correction Test =====");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            Console.WriteLine("-- tarPos: [" + tarPos + "]");
            Console.WriteLine("-- inTokenObj: [" + inTokenObj.ToString() + "]");
            // get the correct term
            TokenObj outTokenObj = GetCorrectTerm(inTokenObj, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);

            // print out
            Console.WriteLine("--------- GetCorrectTermStr( ) -----------");
            Console.WriteLine("-- outTokenObj: [" + outTokenObj.ToString() + "]");
        }
        // private method
        // Test merge and Split
        private static void Test(string inText, int tarPos, int tarSize, int radius, string mergedWord, string splitWords, Word2Vec w2vIm, Word2Vec w2vOm)
        {
            // 0. process the inText
            TextObj         textObj    = new TextObj(inText);
            List <TokenObj> inTextList = textObj.GetTokenList();
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);

            Console.WriteLine("==========================================");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            bool word2VecSkipWord = true;
            bool debugFlag        = false;
            // 1.a context with window radius
            DoubleVec contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
            // 1.b context with all inText
            DoubleVec contextVecA = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, word2VecSkipWord, debugFlag);
            // 1.c get score1
            ContextScore score1  = new ContextScore(mergedWord, contextVec, w2vOm);
            ContextScore score1a = new ContextScore(mergedWord, contextVecA, w2vOm);

            Console.WriteLine(score1.ToString() + "|" + string.Format("{0,1:F8}", score1a.GetScore()));
            // 2. split words
            ContextScore score2  = new ContextScore(splitWords, contextVec, w2vOm);
            ContextScore score2a = new ContextScore(splitWords, contextVecA, w2vOm);

            Console.WriteLine(score2.ToString() + "|" + string.Format("{0,1:F8}", score2a.GetScore()));
            // 3. 3. 3. Use avg. score on single words
            // This method use different context for each single word
            List <string> splitWordList = TermUtil.ToWordList(splitWords);
            int           index         = 0;
            double        scoreSAvg     = 0.0d;  // radius
            double        scoreSAAvg    = 0.0d;  // all inText

            //debugFlag = false;
            foreach (string splitWord in splitWordList)
            {
                // window radius
                DoubleVec    contextVecS = Word2VecContext.GetContextVec(tarPos + index, 1, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
                ContextScore scoreS      = new ContextScore(splitWord, contextVecS, w2vOm);
                //System.out.println("-- " + scoreS.ToString());
                scoreSAvg += scoreS.GetScore();
                // all text
                DoubleVec    contextVecSA = Word2VecContext.GetContextVec(tarPos + index, 1, nonSpaceTokenList, w2vIm, word2VecSkipWord, debugFlag);
                ContextScore scoreSA      = new ContextScore(splitWord, contextVecSA, w2vOm);
                //System.out.println("-- " + scoreSA.ToString());
                scoreSAAvg += scoreSA.GetScore();
                index++;
            }
            scoreSAvg  = scoreSAvg / index;            // window
            scoreSAAvg = scoreSAAvg / index;           // all text
            Console.WriteLine("Avg. Single Word|" + string.Format("{0,1:F8}", scoreSAvg) + "|" + string.Format("{0,1:F8}", scoreSAAvg));
        }
        // recursively process
        public static string Process(string inWord, int maxProcess)
        {
            string lastText = inWord;
            string outText  = Process(inWord);

            while ((maxProcess > 0) && (outText.Equals(lastText) == false))
            {
                // recusively process
                lastText = outText;
                // converts to textObj for recursively process
                TextObj         textObj      = new TextObj(lastText);
                List <TokenObj> inTokenList  = textObj.GetTokenList();
                List <TokenObj> outTokenList = new List <TokenObj>(inTokenList.Select(tokenObj => tokenObj.GetTokenStr()).Select(tokenStr => Process(tokenStr)).Select(outStr => new TokenObj(outStr)).ToList());

                outText = TextObj.TokenListToText(outTokenList);
                maxProcess--;
            }
            return(outText);
        }
        // private methods
        // this test is not verified
        private static int RunTest(bool detailFlag, int tarPos, int tarSize, int contextRadius, long limitNo)
        {
            // init dic
            string    configFile = "../data/Config/cSpell.properties";
            CSpellApi cSpellApi  = new CSpellApi(configFile);

            cSpellApi.SetRankMode(CSpellApi.RANK_MODE_CONTEXT);
            Word2Vec word2VecIm       = cSpellApi.GetWord2VecIm();
            Word2Vec word2VecOm       = cSpellApi.GetWord2VecOm();
            bool     word2VecSkipWord = cSpellApi.GetWord2VecSkipWord();
            ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
            // provide cmdLine interface
            int returnValue = 0;

            try {
                StreamReader stdInput = new StreamReader(Console.OpenStandardInput());
                try {
                    string inText = null;
                    Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null))
                    {
                        // ---------------------------------
                        // Get spell correction on the input
                        // ---------------------------------
                        // convert input text to TokenObj
                        TextObj         textObj    = new TextObj(inText);
                        List <TokenObj> inTextList = textObj.GetTokenList();
                        // *2 because tokenList include space
                        string tarWord = inTextList[tarPos * 2].GetTokenStr();
                        for (int i = 1; i < tarSize; i++)
                        {
                            int ii = (tarPos + 1) * 2;
                            tarWord += " " + inTextList[ii].GetTokenStr();
                        }
                        Console.WriteLine("- input text: [" + inText + "]");
                        Console.WriteLine("- target: [" + tarPos + "|" + tarSize + "|" + tarWord + "]");
                        Console.WriteLine("- context radius: " + contextRadius);
                        // get all possible candidates
                        HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(tarWord, cSpellApi);
                        candSet.Add(tarWord);                         // add the original word
                        Console.WriteLine("-- canSet.size(): " + candSet.Count);
                        // get final suggestion
                        // remove space token from the list
                        List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
                        string          topRankStr        = GetTopRankStr(tarWord, candSet, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                        Console.WriteLine("- top rank str: " + topRankStr);
                        // print details
                        if (detailFlag == true)
                        {
                            HashSet <ContextScore> candScoreSet = GetCandidateScoreSet(candSet, tarPos, tarSize, inTextList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                            Console.WriteLine("------ Suggestion List ------");
                            var list = candScoreSet.OrderBy(x => x, csc).Take((int)limitNo).Select(obj => obj.ToString());
                            foreach (var item in list)
                            {
                                Console.WriteLine(item);
                            }
                        }
                        // print the prompt
                        Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    }
                } catch (Exception e2) {
                    Console.Error.WriteLine(e2.Message);
                    returnValue = -1;
                }
            } catch (Exception e) {
                Console.Error.WriteLine(e.Message);
                returnValue = -1;
            }
            return(returnValue);
        }