示例#1
0
        public virtual void testReusableTokenStream30()
        {
            ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);

            assertAnalyzesTo(analyzer, "", new string[] {});

            assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" });

            assertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "*****@*****.**" });
        }
示例#2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
        public virtual void testReusableTokenStream()
        {
            ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);

            assertAnalyzesTo(analyzer, "", new string[] {});

            assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" });

            assertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" });
        }
示例#3
0
        /*
         * Test that position increments are adjusted correctly for stopwords.
         */
        // note this test uses stopfilter's stopset
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testPositionIncrements() throws Exception
        public virtual void testPositionIncrements()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, org.apache.lucene.analysis.core.StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 3, 6, 9, 18, 22, 25, 28 }, new int[] { 3, 6, 9, 13, 22, 25, 28, 30 }, new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });

            // case that a stopword is adjacent to thai text, with no whitespace
            assertAnalyzesTo(analyzer, "การที่ได้ต้องthe แสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 3, 6, 9, 17, 21, 24, 27 }, new int[] { 3, 6, 9, 13, 21, 24, 27, 29 }, new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
        }
示例#4
0
 // LUCENE-3044
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testAttributeReuse() throws Exception
 public virtual void testAttributeReuse()
 {
     ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
     // just consume
     TokenStream ts = analyzer.tokenStream("dummy", "ภาษาไทย");
     assertTokenStreamContents(ts, new string[] {"ภาษา", "ไทย"});
     // this consumer adds flagsAtt, which this analyzer does not use.
     ts = analyzer.tokenStream("dummy", "ภาษาไทย");
     ts.addAttribute(typeof(FlagsAttribute));
     assertTokenStreamContents(ts, new string[] {"ภาษา", "ไทย"});
 }
示例#5
0
        // LUCENE-3044
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testAttributeReuse() throws Exception
        public virtual void testAttributeReuse()
        {
            ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
            // just consume
            TokenStream ts = analyzer.tokenStream("dummy", "ภาษาไทย");

            assertTokenStreamContents(ts, new string[] { "ภาษา", "ไทย" });
            // this consumer adds flagsAtt, which this analyzer does not use.
            ts = analyzer.tokenStream("dummy", "ภาษาไทย");
            ts.addAttribute(typeof(FlagsAttribute));
            assertTokenStreamContents(ts, new string[] { "ภาษา", "ไทย" });
        }
示例#6
0
        public virtual void testAnalyzer30()
        {
            ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);

            assertAnalyzesTo(analyzer, "", new string[] {});

            assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" });

            assertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "*****@*****.**" });

            // English stop words
            assertAnalyzesTo(analyzer, "ประโยคว่า The quick brown fox jumped over the lazy dogs", new string[] { "ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs" });
        }
示例#7
0
        public virtual void testAnalyzer30()
        {
            ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);

            assertAnalyzesTo(analyzer, "", new string[] {});

            assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] {"การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});

            assertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] {"บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "*****@*****.**"});

            // English stop words
            assertAnalyzesTo(analyzer, "ประโยคว่า The quick brown fox jumped over the lazy dogs", new string[] {"ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs"});
        }
示例#8
0
        public virtual void testReusableTokenStream30()
        {
            ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
              assertAnalyzesTo(analyzer, "", new string[] {});

              assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] {"การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});

              assertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] {"บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "*****@*****.**"});
        }
示例#9
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testReusableTokenStream() throws Exception
        public virtual void testReusableTokenStream()
        {
            ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
            assertAnalyzesTo(analyzer, "", new string[] {});

              assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] {"การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});

              assertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] {"บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com"});
        }
示例#10
0
        /*
           * Test that position increments are adjusted correctly for stopwords.
           */
        // note this test uses stopfilter's stopset
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testPositionIncrements() throws Exception
        public virtual void testPositionIncrements()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, org.apache.lucene.analysis.core.StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี", new string[] {"การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}, new int[] {0, 3, 6, 9, 18, 22, 25, 28}, new int[] {3, 6, 9, 13, 22, 25, 28, 30}, new int[] {1, 1, 1, 1, 2, 1, 1, 1});

            // case that a stopword is adjacent to thai text, with no whitespace
            assertAnalyzesTo(analyzer, "การที่ได้ต้องthe แสดงว่างานดี", new string[] {"การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}, new int[] {0, 3, 6, 9, 17, 21, 24, 27}, new int[] {3, 6, 9, 13, 21, 24, 27, 29}, new int[] {1, 1, 1, 1, 2, 1, 1, 1});
        }