Ejemplo n.º 1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testDelimitersSA() throws Exception
        public virtual void testDelimitersSA()
        {
            // other delimiters: "-", "/", ","
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new string[] { "some", "dashed", "phrase" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new string[] { "dogs", "chase", "cats" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new string[] { "ac", "dc" });
        }
Ejemplo n.º 2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNumericSA() throws Exception
        public virtual void testNumericSA()
        {
            // floating point, serial, model numbers, ip addresses, etc.
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new string[] { "21.35" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new string[] { "R2D2", "C3PO" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new string[] { "216.239.63.104" });
        }
Ejemplo n.º 3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testVariousTextSA() throws Exception
        public virtual void testVariousTextSA()
        {
            // various
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new string[] { "C", "embedded", "developers", "wanted" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new string[] { "foo", "bar", "FOO", "BAR" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new string[] { "foo", "bar", "FOO", "BAR" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new string[] { "QUOTED", "word" });
        }
Ejemplo n.º 4
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testMailtoSchemeEmails() throws Exception
        public virtual void testMailtoSchemeEmails()
        {
            // See LUCENE-3880
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "MAILTO:[email protected]", new string[] { "mailto", "*****@*****.**" }, new string[] { "<ALPHANUM>", "<EMAIL>" });

            // TODO: Support full mailto: scheme URIs. See RFC 6068: http://tools.ietf.org/html/rfc6068
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "mailto:[email protected],[email protected][email protected]" + "&subject=Subjectivity&body=Corpusivity%20or%20something%20like%20that", new string[] { "mailto", "*****@*****.**", ",[email protected]", "[email protected]", "subject", "subjectivity", "body", "corpusivity", "20or", "20something", "20like", "20that" }, new string[] { "<ALPHANUM>", "<EMAIL>", "<EMAIL>", "<EMAIL>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" }); // TODO: Hex decoding + re-tokenization -  TODO: split field keys/values
            // TODO: recognize ',' address delimiter. Also, see examples of ';' delimiter use at: http://www.mailto.co.uk/
        }
Ejemplo n.º 5
0
        /* test various jira issues this analyzer is related to */

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testLUCENE1545() throws Exception
        public virtual void testLUCENE1545()
        {
            /*
             * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
             * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
             * Expected result is only on token "moͤchte".
             */
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new string[] { "moͤchte" });
        }
Ejemplo n.º 6
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testApostrophesSA() throws Exception
        public virtual void testApostrophesSA()
        {
            // internal apostrophes: O'Reilly, you're, O'Reilly's
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new string[] { "O'Reilly" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new string[] { "you're" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new string[] { "she's" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new string[] { "Jim's" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new string[] { "don't" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new string[] { "O'Reilly's" });
        }
Ejemplo n.º 7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHugeDoc() throws java.io.IOException
        public virtual void testHugeDoc()
        {
            StringBuilder sb = new StringBuilder();

            char[] whitespace = new char[4094];
            Arrays.fill(whitespace, ' ');
            sb.Append(whitespace);
            sb.Append("testing 1234");
            string input = sb.ToString();

            BaseTokenStreamTestCase.assertAnalyzesTo(a, input, new string[] { "testing", "1234" });
        }
Ejemplo n.º 8
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testURLs() throws Exception
        public virtual void testURLs()
        {
            Reader reader = null;
            string randomTextWithURLs;

            try
            {
                reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("random.text.with.urls.txt"), Encoding.UTF8);
                StringBuilder builder = new StringBuilder();
                char[]        buffer  = new char[1024];
                int           numCharsRead;
                while (-1 != (numCharsRead = reader.read(buffer)))
                {
                    builder.Append(buffer, 0, numCharsRead);
                }
                randomTextWithURLs = builder.ToString();
            }
            finally
            {
                if (null != reader)
                {
                    reader.close();
                }
            }
            assertTrue(null != randomTextWithURLs && randomTextWithURLs.Length > 0);
            System.IO.StreamReader bufferedReader = null;
            string[] urls;
            try
            {
                IList <string> urlList = new List <string>();
                bufferedReader = new System.IO.StreamReader(this.GetType().getResourceAsStream("urls.from.random.text.with.urls.txt"), Encoding.UTF8);
                string line;
                while (null != (line = bufferedReader.ReadLine()))
                {
                    line = line.Trim();
                    if (line.Length > 0)
                    {
                        urlList.Add(line);
                    }
                }
                urls = urlList.ToArray();
            }
            finally
            {
                if (null != bufferedReader)
                {
                    bufferedReader.Close();
                }
            }
            assertTrue(null != urls && urls.Length > 0);
            BaseTokenStreamTestCase.assertAnalyzesTo(urlAnalyzer, randomTextWithURLs, urls);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Multiple consecutive chars in \p{WB:MidLetter}, \p{WB:MidNumLet},
        /// and/or \p{MidNum} should trigger a token split.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testMid() throws Exception
        public virtual void testMid()
        {
            // ':' is in \p{WB:MidLetter}, which should trigger a split unless there is a Letter char on both sides
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B", new string[] { "A:B" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A::B", new string[] { "A", "B" });

            // '.' is in \p{WB:MidNumLet}, which should trigger a split unless there is a Letter or Numeric char on both sides
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2", new string[] { "1.2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B", new string[] { "A.B" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1..2", new string[] { "1", "2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A..B", new string[] { "A", "B" });

            // ',' is in \p{WB:MidNum}, which should trigger a split unless there is a Numeric char on both sides
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2", new string[] { "1,2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,,2", new string[] { "1", "2" });

            // Mixed consecutive \p{WB:MidLetter} and \p{WB:MidNumLet} should trigger a split
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.:B", new string[] { "A", "B" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:.B", new string[] { "A", "B" });

            // Mixed consecutive \p{WB:MidNum} and \p{WB:MidNumLet} should trigger a split
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,.2", new string[] { "1", "2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.,2", new string[] { "1", "2" });

            // '_' is in \p{WB:ExtendNumLet}

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A:B", new string[] { "A:B_A:B" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A::B", new string[] { "A:B_A", "B" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1.2", new string[] { "1.2_1.2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A.B", new string[] { "A.B_A.B" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1..2", new string[] { "1.2_1", "2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A..B", new string[] { "A.B_A", "B" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,2", new string[] { "1,2_1,2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,,2", new string[] { "1,2_1", "2" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A.:B", new string[] { "C_A", "B" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A:.B", new string[] { "C_A", "B" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1,.2", new string[] { "3_1", "2" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new string[] { "3_1", "2" });
        }
Ejemplo n.º 10
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNoSchemeURLs() throws Exception
        public virtual void testNoSchemeURLs()
        {
            // ".ph" is a Top Level Domain
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "<index.ph>", new string[] { "index.ph" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "index.ph", new string[] { "index.ph" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "index.php", new string[] { "index.php" }, new string[] { "<ALPHANUM>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "index.phα", new string[] { "index.phα" }, new string[] { "<ALPHANUM>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "index-h.php", new string[] { "index", "h.php" }, new string[] { "<ALPHANUM>", "<ALPHANUM>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "index2.php", new string[] { "index2", "php" }, new string[] { "<ALPHANUM>", "<ALPHANUM>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "index2.ph9,", new string[] { "index2", "ph9" }, new string[] { "<ALPHANUM>", "<ALPHANUM>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com,example.ph,index.php,index2.php,example2.ph", new string[] { "example.com", "example.ph", "index.php", "index2", "php", "example2.ph" }, new string[] { "<URL>", "<URL>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080 example.com/path/here example.com?query=something example.com#fragment", new string[] { "example.com:8080", "example.com/path/here", "example.com?query=something", "example.com#fragment" }, new string[] { "<URL>", "<URL>", "<URL>", "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here?query=something#fragment", new string[] { "example.com:8080/path/here?query=something#fragment" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here?query=something", new string[] { "example.com:8080/path/here?query=something" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here#fragment", new string[] { "example.com:8080/path/here#fragment" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here", new string[] { "example.com:8080/path/here" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080?query=something#fragment", new string[] { "example.com:8080?query=something#fragment" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080?query=something", new string[] { "example.com:8080?query=something" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080#fragment", new string[] { "example.com:8080#fragment" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com/path/here?query=something#fragment", new string[] { "example.com/path/here?query=something#fragment" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com/path/here?query=something", new string[] { "example.com/path/here?query=something" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com/path/here#fragment", new string[] { "example.com/path/here#fragment" }, new string[] { "<URL>" });

            BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com?query=something#fragment", new string[] { "example.com?query=something#fragment" }, new string[] { "<URL>" });
        }
Ejemplo n.º 11
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testJapanese() throws Exception
        public virtual void testJapanese()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ", new string[] { "仮", "名", "遣", "い", "カタカナ" }, new string[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
        }
Ejemplo n.º 12
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testSupplementary() throws Exception
        public virtual void testSupplementary()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱鍟䇹愯瀛", new string[] { "𩬅", "艱", "鍟", "䇹", "愯", "瀛" }, new string[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>" });
        }
Ejemplo n.º 13
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testKorean() throws Exception
        public virtual void testKorean()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음", new string[] { "훈민정음" }, new string[] { "<HANGUL>" });
        }
Ejemplo n.º 14
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testOffsets() throws Exception
        public virtual void testOffsets()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] { "David", "has", "5000", "bones" }, new int[] { 0, 6, 10, 15 }, new int[] { 5, 9, 14, 20 });
        }
Ejemplo n.º 15
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testTypes() throws Exception
        public virtual void testTypes()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] { "David", "has", "5000", "bones" }, new string[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
        }
Ejemplo n.º 16
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testArabic() throws Exception
        public virtual void testArabic()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.", new string[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" });
        }
Ejemplo n.º 17
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testLao() throws Exception
        public virtual void testLao()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", new string[] { "ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ" });
        }
Ejemplo n.º 18
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testBengali() throws Exception
        public virtual void testBengali()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।", new string[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার", "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" });
        }
Ejemplo n.º 19
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testArmenian() throws Exception
        public virtual void testArmenian()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։", new string[] { "Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը" });
        }
Ejemplo n.º 20
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testTibetan() throws Exception
        public virtual void testTibetan()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", new string[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" });
        }
Ejemplo n.º 21
0
        /* Tests from StandardAnalyzer, just to show behavior is similar */
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testAlphanumericSA() throws Exception
        public virtual void testAlphanumericSA()
        {
            // alphanumeric tokens
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new string[] { "B2B" });
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new string[] { "2B" });
        }
Ejemplo n.º 22
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testGreek() throws Exception
        public virtual void testGreek()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.", new string[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που", "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" });
        }
Ejemplo n.º 23
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testEmpty() throws Exception
        public virtual void testEmpty()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new string[] {});
            BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new string[] {});
            BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new string[] {});
        }
Ejemplo n.º 24
0
        /*
         * For chinese, tokenize as char (these can later form bigrams or whatever)
         */
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testChinese() throws Exception
        public virtual void testChinese()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 1234 Tests ", new string[] { "我", "是", "中", "国", "人", "1234", "Tests" });
        }
Ejemplo n.º 25
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testAmharic() throws Exception
        public virtual void testAmharic()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም", new string[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" });
        }
Ejemplo n.º 26
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testKoreanSA() throws Exception
        public virtual void testKoreanSA()
        {
            // Korean words
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new string[] { "안녕하세요", "한글입니다" });
        }
Ejemplo n.º 27
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testAramaic() throws Exception
        public virtual void testAramaic()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀", new string[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ" });
        }
Ejemplo n.º 28
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testThai() throws Exception
        public virtual void testThai()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔", new string[] { "การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔" });
        }
Ejemplo n.º 29
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFarsi() throws Exception
        public virtual void testFarsi()
        {
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.", new string[] { "ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی", "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد" });
        }
Ejemplo n.º 30
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testTextWithNumbersSA() throws Exception
        public virtual void testTextWithNumbersSA()
        {
            // numbers
            BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] { "David", "has", "5000", "bones" });
        }