public virtual void TestPositionIncrements() { ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, StopAnalyzer.ENGLISH_STOP_WORDS_SET); AssertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 3, 6, 9, 18, 22, 25, 28 }, new int[] { 3, 6, 9, 13, 22, 25, 28, 30 }, new int[] { 1, 1, 1, 1, 2, 1, 1, 1 }); // case that a stopword is adjacent to thai text, with no whitespace AssertAnalyzesTo(analyzer, "การที่ได้ต้องthe แสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 3, 6, 9, 17, 21, 24, 27 }, new int[] { 3, 6, 9, 13, 21, 24, 27, 29 }, new int[] { 1, 1, 1, 1, 2, 1, 1, 1 }); }
public virtual void TestReusableTokenStream30() { ThaiAnalyzer analyzer = new ThaiAnalyzer(LuceneVersion.LUCENE_30); AssertAnalyzesTo(analyzer, "", new string[] { }); AssertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }); AssertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "*****@*****.**" }); }
public virtual void TestReusableTokenStream() { ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); AssertAnalyzesTo(analyzer, "", new string[] { }); AssertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }); AssertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" }); }
public virtual void TestAttributeReuse() { ThaiAnalyzer analyzer = new ThaiAnalyzer(LuceneVersion.LUCENE_30); // just consume TokenStream ts = analyzer.TokenStream("dummy", "ภาษาไทย"); AssertTokenStreamContents(ts, new string[] { "ภาษา", "ไทย" }); // this consumer adds flagsAtt, which this analyzer does not use. ts = analyzer.TokenStream("dummy", "ภาษาไทย"); ts.AddAttribute <IFlagsAttribute>(); AssertTokenStreamContents(ts, new string[] { "ภาษา", "ไทย" }); }
public virtual void TestAnalyzer30() { ThaiAnalyzer analyzer = new ThaiAnalyzer(LuceneVersion.LUCENE_30); AssertAnalyzesTo(analyzer, "", new string[] { }); AssertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new string[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }); AssertAnalyzesTo(analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new string[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "*****@*****.**" }); // English stop words AssertAnalyzesTo(analyzer, "ประโยคว่า The quick brown fox jumped over the lazy dogs", new string[] { "ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs" }); }
public virtual void TestConcurrency() { ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT); char[] chars = new char[] { (char)4160, (char)4124, (char)4097, (char)4177, (char)4113, (char)32, (char)10671, }; string contents = new string(chars); AssertAnalyzer(analyzer, contents); int numThreads = 4; var startingGun = new CountdownEvent(1); var threads = new ThaiAnalyzerThread[numThreads]; for (int i = 0; i < threads.Length; i++) { threads[i] = new ThaiAnalyzerThread(startingGun, analyzer, contents); } foreach (var thread in threads) { thread.Start(); } startingGun.Signal(); foreach (var t in threads) { try { t.Join(); } #pragma warning disable 168 catch (ThreadInterruptedException e) #pragma warning restore 168 { fail("Thread interrupted"); } } }
public virtual void TestAttributeReuse() { #pragma warning disable 612, 618 ThaiAnalyzer analyzer = new ThaiAnalyzer(LuceneVersion.LUCENE_30); #pragma warning restore 612, 618 // just consume TokenStream ts = analyzer.TokenStream("dummy", "ภาษาไทย"); AssertTokenStreamContents(ts, new string[] { "ภาษา", "ไทย" }); // this consumer adds flagsAtt, which this analyzer does not use. ts = analyzer.TokenStream("dummy", "ภาษาไทย"); ts.AddAttribute<IFlagsAttribute>(); AssertTokenStreamContents(ts, new string[] { "ภาษา", "ไทย" }); }
public virtual void TestNumeralBreaking() { ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); AssertAnalyzesTo(analyzer, "๑๒๓456", new String[] { "๑๒๓456" }); }