private static void TestSplit(CSpellApi cSpellApi)
        {
            // setup test case
            // 10349.txt
            //String inText = "sounding in my ear every time for along time.";
            // 13864.txt
            string          inText            = "I donate my self to be apart of this study.";
            TextObj         textObj           = new TextObj(inText);
            List <TokenObj> inTextList        = textObj.GetTokenList();
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
            //int tarPos = 7;
            int      tarPos     = 6;
            TokenObj inTokenObj = nonSpaceTokenList[tarPos];
            bool     debugFlag  = false;

            Console.WriteLine("====== Real-Word One-To-One Correction Test =====");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            Console.WriteLine("-- tarPos: [" + tarPos + "]");
            Console.WriteLine("-- inTokenObj: [" + inTokenObj.ToString() + "]");
            // get the correct term
            TokenObj outTokenObj = GetCorrectTerm(inTokenObj, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);

            // print out
            Console.WriteLine("--------- GetCorrectTermStr( ) -----------");
            Console.WriteLine("-- outTokenObj: [" + outTokenObj.ToString() + "]");
        }
Exemplo n.º 2
0
        // public methods
        // private methods
        private static void Test()
        {
            int             tarIndex         = 6;            // target index
            int             startIndex       = 4;            // start index of merge
            int             endIndex         = 6;            // end index of merge
            int             tarPos           = 3;            // target pos
            int             startPos         = 2;            // start pos of merge
            int             endPos           = 3;            // end pos of merge
            int             mergeNo          = 1;            // total no of merged tokens
            string          tarWord          = "gnosed";     // target term
            string          mergeWord        = "diagnosed."; // suggested merged terms
            string          coreMergeWord    = "diagnosed";  // core suggested merged terms
            string          orgMergeWord     = "dia gnosed"; // org word b4 merge
            MergeObj        mergeObj         = new MergeObj(tarWord, orgMergeWord, mergeWord, coreMergeWord, mergeNo, startIndex, tarIndex, endIndex, startPos, tarPos, endPos);
            string          inText           = "He is dia gnosed last week.";
            List <TokenObj> inTextList       = TextObj.TextToTokenList(inText);
            List <TokenObj> nonSpaceTextList = TextObj.GetNonSpaceTokenObjList(inTextList);

            Console.WriteLine("------ Merge Obj -------");
            Console.WriteLine(mergeObj.ToString());
            Console.WriteLine("------ Non Merge Term -------");
            string nonMergeTerm = GetNonMergeTerm(mergeObj, nonSpaceTextList);

            Console.WriteLine("- inText: [" + inText + "]");
            Console.WriteLine("- nonMergeTerm: [" + nonMergeTerm + "]");
        }
Exemplo n.º 3
0
        private static void TestOnSet(Word2Vec w2vIm, Word2Vec w2vOm)
        {
            string inText = "He was diagnosed early on set dementia 3 years ago.";

            TextObj         textObj    = new TextObj(inText);
            List <TokenObj> inTextList = textObj.GetTokenList();
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);

            Console.WriteLine("==========================================");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            int  tarPos           = 4;
            int  tarSize          = 2;   // "on set" has 2 tokens
            int  radius           = 2;
            bool word2VecSkipWord = true;
            bool debugFlag        = false;
            // 1 context with window radius
            DoubleVec    contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
            string       str1       = "onset";
            ContextScore s1         = new ContextScore(str1, contextVec, w2vOm);
            string       str2       = "on set";
            ContextScore s2         = new ContextScore(str2, contextVec, w2vOm);

            Console.WriteLine("- [" + str1 + "]: " + s1.ToString());
            Console.WriteLine("- [" + str2 + "]: " + s2.ToString());
        }
Exemplo n.º 4
0
        private static void Test1To1(CSpellApi cSpellApi)
        {
            // setup test case
            // 51.txt
            //String inText = "You'd thing that this is good.";
            //String inText = "The doctor thing that this is good.";
            string          inText            = "you would thing that is good.";
            TextObj         textObj           = new TextObj(inText);
            List <TokenObj> inTextList        = textObj.GetTokenList();
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
            int             tarPos            = 2;
            TokenObj        inTokenObj        = nonSpaceTokenList[tarPos];
            bool            debugFlag         = false;

            Console.WriteLine("====== Real-Word One-To-One Correction Test =====");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            Console.WriteLine("-- tarPos: [" + tarPos + "]");
            Console.WriteLine("-- inTokenObj: [" + inTokenObj.ToString() + "]");
            // get the correct term
            TokenObj outTokenObj = GetCorrectTerm(inTokenObj, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);

            // print out
            Console.WriteLine("--------- GetCorrectTermStr( ) -----------");
            Console.WriteLine("-- outTokenObj: [" + outTokenObj.ToString() + "]");
        }
        // public method
        // process
        public static List <TokenObj> Process(List <TokenObj> inTokenList, CSpellApi cSpellApi, bool debugFlag)
        {
            DebugPrint.PrintProcess("5. RealWord-Merge", debugFlag);
            DebugPrint.PrintInText(TextObj.TokenListToText(inTokenList), debugFlag);
            // pre-porcess
            // update Pos for the inTokenList
            TextObj.UpdateIndexPos(inTokenList);
            // 1. remove non space-token and convert to non-space-token list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList);
            // 2. process: go through each token for detection and correction
            // to find merge corrections (mergeObjList)
            int             index               = 0;
            List <MergeObj> mergeObjList        = new List <MergeObj>();
            int             maxLegitTokenLength = cSpellApi.GetMaxLegitTokenLength();

            while (index < inTokenList.Count)
            {
                TokenObj curTokenObj = inTokenList[index];

                // update the tarPos
                // SCR-3, use legit token
                if (curTokenObj.IsLegitToken(maxLegitTokenLength) == true)
                {
                    int tarPos = inTokenList[index].GetPos();
                    // correct term is the highest ranked candidates
                    MergeObj mergeObj = RealWordMergeCorrector.GetCorrectTerm(tarPos, nonSpaceTokenList, cSpellApi, debugFlag);
                    if (mergeObj == null)                       // no merge correction
                    {
                        index++;
                    }
                    else                         // has merge correction
                    {
                        mergeObjList.Add(mergeObj);
                        // next token after end token, this ensures no overlap merge
                        index = mergeObj.GetEndIndex() + 1;
                    }
                }
                else                     // space token
                                         // update index
                {
                    index++;
                }
            }
            // update the output for merge for the whole inTokenList,
            // has to update after the loop bz merge might
            // happen to the previous token
            // update the tokenObj up to the merge, then go to the next token
            // update operation info also
            List <TokenObj> outTokenList = MergeCorrector.CorrectTokenListByMerge(inTokenList, mergeObjList, TokenObj.HIST_RW_M, debugFlag, cSpellApi);

            return(outTokenList);
        }
Exemplo n.º 6
0
        // private method
        // Test merge and Split
        private static void Test(string inText, int tarPos, int tarSize, int radius, string mergedWord, string splitWords, Word2Vec w2vIm, Word2Vec w2vOm)
        {
            // 0. process the inText
            TextObj         textObj    = new TextObj(inText);
            List <TokenObj> inTextList = textObj.GetTokenList();
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);

            Console.WriteLine("==========================================");
            Console.WriteLine("-- inTextList: [" + inText + "]");
            bool word2VecSkipWord = true;
            bool debugFlag        = false;
            // 1.a context with window radius
            DoubleVec contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
            // 1.b context with all inText
            DoubleVec contextVecA = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, word2VecSkipWord, debugFlag);
            // 1.c get score1
            ContextScore score1  = new ContextScore(mergedWord, contextVec, w2vOm);
            ContextScore score1a = new ContextScore(mergedWord, contextVecA, w2vOm);

            Console.WriteLine(score1.ToString() + "|" + string.Format("{0,1:F8}", score1a.GetScore()));
            // 2. split words
            ContextScore score2  = new ContextScore(splitWords, contextVec, w2vOm);
            ContextScore score2a = new ContextScore(splitWords, contextVecA, w2vOm);

            Console.WriteLine(score2.ToString() + "|" + string.Format("{0,1:F8}", score2a.GetScore()));
            // 3. 3. 3. Use avg. score on single words
            // This method use different context for each single word
            List <string> splitWordList = TermUtil.ToWordList(splitWords);
            int           index         = 0;
            double        scoreSAvg     = 0.0d;  // radius
            double        scoreSAAvg    = 0.0d;  // all inText

            //debugFlag = false;
            foreach (string splitWord in splitWordList)
            {
                // window radius
                DoubleVec    contextVecS = Word2VecContext.GetContextVec(tarPos + index, 1, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);
                ContextScore scoreS      = new ContextScore(splitWord, contextVecS, w2vOm);
                //System.out.println("-- " + scoreS.ToString());
                scoreSAvg += scoreS.GetScore();
                // all text
                DoubleVec    contextVecSA = Word2VecContext.GetContextVec(tarPos + index, 1, nonSpaceTokenList, w2vIm, word2VecSkipWord, debugFlag);
                ContextScore scoreSA      = new ContextScore(splitWord, contextVecSA, w2vOm);
                //System.out.println("-- " + scoreSA.ToString());
                scoreSAAvg += scoreSA.GetScore();
                index++;
            }
            scoreSAvg  = scoreSAvg / index;            // window
            scoreSAAvg = scoreSAAvg / index;           // all text
            Console.WriteLine("Avg. Single Word|" + string.Format("{0,1:F8}", scoreSAvg) + "|" + string.Format("{0,1:F8}", scoreSAAvg));
        }
Exemplo n.º 7
0
        private static void TestGetCorrectTerm(CSpellApi cSpellApi)
        {
            // init
            // all lowerCase
            string          inText      = "Dur ing my absent.";
            bool            debugFlag   = false;
            List <TokenObj> inTokenList = TextObj.TextToTokenList(inText);
            // 1. convert to the non-empty token list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList);
            // result
            int      tarPos   = 0;
            MergeObj mergeObj = NonWordMergeCorrector.GetCorrectTerm(tarPos, nonSpaceTokenList, cSpellApi, debugFlag);

            // print out
            Console.WriteLine("--------- GetCorrectTerm( ) -----------");
            Console.WriteLine("In: [" + inText + "]");
            Console.WriteLine("In nonSpaceTokenList: [" + nonSpaceTokenList.Count + "]");
            Console.WriteLine("Out MergeObj: [" + mergeObj.ToString() + "]");
        }
Exemplo n.º 8
0
        // public method
        // Use: for loop, the latest and greatest implementation
        // original implementation with for loop, To be deleted
        // the core of spell-correction, include split
        // inTokenList is the whole text
        public static List <TokenObj> Process(List <TokenObj> inTokenList, CSpellApi cSpellApi, bool debugFlag)
        {
            DebugPrint.PrintProcess("3-4. NonWord-Split & 1To1", debugFlag);
            DebugPrint.PrintInText(TextObj.TokenListToText(inTokenList), debugFlag);
            // init the output TokenList
            List <TokenObj> outTokenList = new List <TokenObj>();
            // process: go through each token for detection and correction
            // for the 1-to-1 and split correction
            int tarPos = 0;             // the position of the tokenObj in the inTokenList
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTokenList);
            // use the inTokenList to keep the same spcae token
            TokenObj outTokenObj         = null;
            int      maxLegitTokenLength = cSpellApi.GetMaxLegitTokenLength();

            foreach (TokenObj tokenObj in inTokenList)
            {
                /// <summary>
                /// no context
                /// TokenObj outTokenObj = SpellCorrector.GetCorrectTerm(tokenObj,
                ///    cSpellApi, debugFlag);
                ///
                /// </summary>
                // skip empty space tokens and long tokens
                // SCR-3, use legit token
                if (tokenObj.IsLegitToken(maxLegitTokenLength) == true)
                {
                    // correct term is the highest ranked candidate
                    outTokenObj = NonWordCorrector.GetCorrectTerm(tokenObj, cSpellApi, debugFlag, tarPos, nonSpaceTokenList);
                    // used tarPos for context module
                    tarPos++;
                }
                else
                {
                    outTokenObj = tokenObj;
                }
                // add the corrected tokenObj to the output token list
                // use FlatMap because there might be a split
                Split1To1Corrector.AddSplit1To1Correction(outTokenList, outTokenObj);
            }
            return(outTokenList);
        }
Exemplo n.º 9
0
        // private methods
        private static void Test()
        {
            // init cSpellApi
            string    configFile = "../data/Config/cSpell.properties";
            CSpellApi cSpellApi  = new CSpellApi(configFile);

            Console.WriteLine("===== Unit Test of MergeCandidates =====");
            //String inText = "He was dia gnosed  early onset deminita 3 year ago.";
            // example from 73.txt
            //String inText = "I have seven live births with no problems dur ing my pregnancies. That is a dis appoint ment";
            string          inText     = "That is a disa ppoint ment.";
            List <TokenObj> inTextList = TextObj.TextToTokenList(inText);
            string          inStr      = String.Join("|", inTextList.Select(obj => obj.GetTokenStr()));

            Console.WriteLine(" - inTextList (" + inTextList.Count + "): [" + inStr + "]");
            Console.WriteLine("-------------------------");
            foreach (TokenObj tokenObj in inTextList)
            {
                Console.WriteLine(tokenObj.ToString());
            }
            int tarPos = 4;

            Console.WriteLine("-------------------------");
            Console.WriteLine("- tarPos: " + tarPos);
            Console.WriteLine("- maxMergeNo: " + cSpellApi.GetCanNwMaxMergeNo());
            Console.WriteLine("------ merge set -------");
            // pre-Process: convert to the non-empty token list
            List <TokenObj> nonSpaceTextList = TextObj.GetNonSpaceTokenObjList(inTextList);
            // get the candidate for a specified target position
            HashSet <MergeObj> mergeSet = GetCandidates(tarPos, nonSpaceTextList, cSpellApi);

            // print out
            foreach (MergeObj mergeObj in mergeSet)
            {
                Console.WriteLine(mergeObj.ToString());
            }
        }
Exemplo n.º 10
0
        private static void Tests(Word2Vec w2vIm, Word2Vec w2vOm)
        {
            string          inText     = "... last 10 years #$% was dianosed test123 yahoo.com early on set deminita 3 year ago.";
            List <TokenObj> inTextList = TextObj.TextToTokenList(inText);

            Console.WriteLine("======= Word2VecContext ======================");
            Console.WriteLine(" - inText: [" + inText + "]");
            string inStr = String.Join("|", inTextList.Select(obj => obj.GetTokenStr()));

            Console.WriteLine(" - inTextList (" + inTextList.Count + "): [" + inStr + "]");

            int  tarPos    = 0;
            int  tarSize   = 1;
            int  index     = 0;
            int  radius    = 3;
            bool debugFlag = false;

            Console.WriteLine("------ Test GetContext (no skip), radius=3 ...");
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);

            foreach (TokenObj tokenObj in inTextList)
            {
                // not the space token
                if (tokenObj.IsSpaceToken() == false)
                {
                    string tokenStr = tokenObj.GetTokenStr();
                    // word2VecSkipWord = false (no skip)
                    List <string> contextList = GetContext(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, false, debugFlag);
                    string        contextStr  = String.Join("|", contextList);
                    Console.WriteLine(tarPos + "|" + index + "|" + tokenStr + ": [" + contextStr + "]");
                    tarPos++;
                }
                index++;
            }
            Console.WriteLine("------ Test GetContext (skip) , radius=3 ...");
            Console.WriteLine(" - inText: [" + inText + "]");
            tarPos = 0;
            foreach (TokenObj tokenObj in inTextList)
            {
                // not the space token
                if (tokenObj.IsSpaceToken() == false)
                {
                    string tokenStr = tokenObj.GetTokenStr();
                    // word2VecSkipWord = true (skip)
                    List <string> contextList2 = GetContext(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, true, debugFlag);
                    string        contextStr2  = String.Join("|", contextList2);
                    Console.WriteLine(tarPos + "|" + index + "|" + tokenStr + ": [" + contextStr2 + "]");
                    tarPos++;
                }
                index++;
            }
            Console.WriteLine("------ Test GetContext (skip) , all ...");
            Console.WriteLine(" - inText: [" + inText + "]");
            tarPos = 0;
            // not the space token
            foreach (TokenObj tokenObj in nonSpaceTokenList)
            {
                string tokenStr = tokenObj.GetTokenStr();
                // word2VecSkipWord = true (skip)
                List <string> contextList3 = GetContext(tarPos, tarSize, nonSpaceTokenList, w2vIm, true, debugFlag);
                string        contextStr3  = String.Join("|", contextList3);
                Console.WriteLine(tarPos + "|" + tokenStr + ": [" + contextStr3 + "]");
                tarPos++;
            }
        }
Exemplo n.º 11
0
        private static void Tests(Word2Vec w2vIm, Word2Vec w2vOm)
        {
            string          inText     = "for the last 10 years    was dianosed\n early on set deminita 3 years ago";
            List <TokenObj> inTextList = TextObj.TextToTokenList(inText);
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
            List <string>   testStrList       = new List <string>();

            testStrList.Add("diagnosed");
            testStrList.Add("diagnose");
            testStrList.Add("dianosed");
            // init context
            int       tarPos           = 6;
            int       tarSize          = 1;
            int       radius           = 2;
            bool      word2VecSkipWord = true;
            bool      debugFlag        = false;
            DoubleVec contextVec       = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, radius, word2VecSkipWord, debugFlag);

            Console.WriteLine("===== Test diagnosed|diagnose|dianosed (window-2) =====");
            Console.WriteLine("inText: [" + inText + "]");
            Console.WriteLine("============================================");
            Console.WriteLine("Candidates|CBOW score|CBOW score 2|Similarity score");
            Console.WriteLine("============================================");
            foreach (string testStr in testStrList)
            {
                Test(testStr, contextVec, w2vIm, w2vOm);
            }
            Console.WriteLine("===== Test diagnosed|diagnose|dianosed (whole text) =====");
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList, w2vIm, word2VecSkipWord, debugFlag);
            foreach (string testStr in testStrList)
            {
                Test(testStr, contextVec, w2vIm, w2vOm);
            }
            string          inText1     = "Not all doctors know about this syndrome.";
            List <TokenObj> inTextList1 = TextObj.TextToTokenList(inText1);
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList1 = TextObj.GetNonSpaceTokenObjList(inTextList1);

            Console.WriteLine("===== Test know about|know|about (window) =====");
            List <string> testStrList1 = new List <string>();

            testStrList1.Add("know about");
            testStrList1.Add("know");
            testStrList1.Add("about");
            tarPos     = 3;
            tarSize    = 2;
            radius     = 2;
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList1, w2vIm, radius, word2VecSkipWord, debugFlag);
            Test(testStrList1[0], contextVec, w2vIm, w2vOm);
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList1, w2vIm, word2VecSkipWord, debugFlag);
            Test(testStrList1[0], contextVec, w2vIm, w2vOm);
            tarPos     = 3;
            tarSize    = 1;
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList1, w2vIm, radius, word2VecSkipWord, debugFlag);
            Test(testStrList1[1], contextVec, w2vIm, w2vOm);
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList1, w2vIm, word2VecSkipWord, debugFlag);
            Test(testStrList1[1], contextVec, w2vIm, w2vOm);
            tarPos     = 4;
            tarSize    = 1;
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList1, w2vIm, radius, word2VecSkipWord, debugFlag);
            Test(testStrList1[2], contextVec, w2vIm, w2vOm);
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList1, w2vIm, word2VecSkipWord, debugFlag);
            Test(testStrList1[2], contextVec, w2vIm, w2vOm);

            string          inText2     = "for the last   10 years was diagnosed early on set dementia 3 years ago.";
            List <TokenObj> inTextList2 = TextObj.TextToTokenList(inText2);
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList2 = TextObj.GetNonSpaceTokenObjList(inTextList2);
            List <string>   testStrList2       = new List <string>();

            testStrList2.Add("onset");
            testStrList2.Add("on set");
            Console.WriteLine("===== Test onset|on set (window-3) =====");
            Console.WriteLine("inText: [" + inText + "]");
            tarPos     = 8;
            tarSize    = 2;
            radius     = 3;
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList2, w2vIm, radius, word2VecSkipWord, debugFlag);
            foreach (string testStr in testStrList2)
            {
                Test(testStr, contextVec, w2vIm, w2vOm);
            }
            tarPos     = 8;
            tarSize    = 1;
            radius     = 3;
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList2, w2vIm, radius, word2VecSkipWord, debugFlag);
            Test("on", contextVec, w2vIm, w2vOm);
            tarPos     = 9;
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList2, w2vIm, radius, word2VecSkipWord, debugFlag);
            Test("set", contextVec, w2vIm, w2vOm);
            Console.WriteLine("===== Test onset|on set (whole text) =====");
            radius     = nonSpaceTokenList2.Count;
            contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, nonSpaceTokenList2, w2vIm, word2VecSkipWord, debugFlag);
            foreach (string testStr in testStrList2)
            {
                Test(testStr, contextVec, w2vIm, w2vOm);
            }
            Console.WriteLine("===== Go through each tokens with diff radius 1-9) =====");
            Console.WriteLine("tarPos|tarWord|r=1|r=2|r=3|r=4|r=5|r=6|r=7|r=8|r=9");
            //String inText3 = "Broken bones can not sleep at night!";
            string          inText3     = "not xyxy all doctors know about this syndrome.";
            List <TokenObj> inTextList3 = TextObj.TextToTokenList(inText3);
            // remove space token from the list
            List <TokenObj> nonSpaceTokenList3 = TextObj.GetNonSpaceTokenObjList(inTextList3);

            tarPos  = 0;
            tarSize = 1;
            radius  = 0;
            foreach (TokenObj tokenObj in nonSpaceTokenList3)
            {
                // skip the space token
                string tokenStr = tokenObj.GetTokenStr();
                string inStr    = Word2VecContext.NormWordForWord2Vec(tokenStr);
                Console.Write(tarPos + "|" + tokenStr + "|");
                // print out all radius
                for (int r = 1; r < 10; r++)
                {
                    contextVec = Word2VecContext.GetContextVec(tarPos, tarSize, inTextList2, w2vIm, r, word2VecSkipWord, debugFlag);
                    TestWin(inStr, contextVec, w2vIm, w2vOm);
                }
                Console.WriteLine("");
                tarPos++;
            }
        }
Exemplo n.º 12
0
        // private methods
        // this test is not verified
        private static int RunTest(bool detailFlag, int tarPos, int tarSize, int contextRadius, long limitNo)
        {
            // init dic
            string    configFile = "../data/Config/cSpell.properties";
            CSpellApi cSpellApi  = new CSpellApi(configFile);

            cSpellApi.SetRankMode(CSpellApi.RANK_MODE_CONTEXT);
            Word2Vec word2VecIm       = cSpellApi.GetWord2VecIm();
            Word2Vec word2VecOm       = cSpellApi.GetWord2VecOm();
            bool     word2VecSkipWord = cSpellApi.GetWord2VecSkipWord();
            ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>();
            // provide cmdLine interface
            int returnValue = 0;

            try {
                StreamReader stdInput = new StreamReader(Console.OpenStandardInput());
                try {
                    string inText = null;
                    Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null))
                    {
                        // ---------------------------------
                        // Get spell correction on the input
                        // ---------------------------------
                        // convert input text to TokenObj
                        TextObj         textObj    = new TextObj(inText);
                        List <TokenObj> inTextList = textObj.GetTokenList();
                        // *2 because tokenList include space
                        string tarWord = inTextList[tarPos * 2].GetTokenStr();
                        for (int i = 1; i < tarSize; i++)
                        {
                            int ii = (tarPos + 1) * 2;
                            tarWord += " " + inTextList[ii].GetTokenStr();
                        }
                        Console.WriteLine("- input text: [" + inText + "]");
                        Console.WriteLine("- target: [" + tarPos + "|" + tarSize + "|" + tarWord + "]");
                        Console.WriteLine("- context radius: " + contextRadius);
                        // get all possible candidates
                        HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(tarWord, cSpellApi);
                        candSet.Add(tarWord);                         // add the original word
                        Console.WriteLine("-- canSet.size(): " + candSet.Count);
                        // get final suggestion
                        // remove space token from the list
                        List <TokenObj> nonSpaceTokenList = TextObj.GetNonSpaceTokenObjList(inTextList);
                        string          topRankStr        = GetTopRankStr(tarWord, candSet, tarPos, tarSize, nonSpaceTokenList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                        Console.WriteLine("- top rank str: " + topRankStr);
                        // print details
                        if (detailFlag == true)
                        {
                            HashSet <ContextScore> candScoreSet = GetCandidateScoreSet(candSet, tarPos, tarSize, inTextList, word2VecIm, word2VecOm, word2VecSkipWord, contextRadius, detailFlag);
                            Console.WriteLine("------ Suggestion List ------");
                            var list = candScoreSet.OrderBy(x => x, csc).Take((int)limitNo).Select(obj => obj.ToString());
                            foreach (var item in list)
                            {
                                Console.WriteLine(item);
                            }
                        }
                        // print the prompt
                        Console.WriteLine("- Please input a text, only a spell error allowed (type \"Ctl-d\" to quit) > ");
                    }
                } catch (Exception e2) {
                    Console.Error.WriteLine(e2.Message);
                    returnValue = -1;
                }
            } catch (Exception e) {
                Console.Error.WriteLine(e.Message);
                returnValue = -1;
            }
            return(returnValue);
        }