public void TestChineseStopWordsDefaultTwoPhrasesIdeoSpace() { #pragma warning disable 612, 618 Analyzer ca = new SmartChineseAnalyzer(LuceneVersion.LUCENE_CURRENT); /* will load stopwords */ #pragma warning restore 612, 618 String sentence = "我购买了道具和服装 我购买了道具和服装。"; String[] result = { "我", "购买", "了", "道具", "和", "服装", "我", "购买", "了", "道具", "和", "服装" }; AssertAnalyzesTo(ca, sentence, result); }
public void TestChineseAnalyzer() { #pragma warning disable 612, 618 Analyzer ca = new SmartChineseAnalyzer(LuceneVersion.LUCENE_CURRENT, true); #pragma warning restore 612, 618 String sentence = "我购买了道具和服装。"; String[] result = { "我", "购买", "了", "道具", "和", "服装" }; AssertAnalyzesTo(ca, sentence, result); }
public void TestChineseStopWords2() { #pragma warning disable 612, 618 Analyzer ca = new SmartChineseAnalyzer(LuceneVersion.LUCENE_CURRENT); /* will load stopwords */ #pragma warning restore 612, 618 String sentence = "Title:San"; // : is a stopword String[] result = { "titl", "san" }; int[] startOffsets = { 0, 6 }; int[] endOffsets = { 5, 9 }; int[] posIncr = { 1, 2 }; AssertAnalyzesTo(ca, sentence, result, startOffsets, endOffsets, posIncr); }
public void TestChineseStopWordsDefault() { #pragma warning disable 612, 618 Analyzer ca = new SmartChineseAnalyzer(LuceneVersion.LUCENE_CURRENT); /* will load stopwords */ #pragma warning restore 612, 618 String sentence = "我购买了道具和服装。"; String[] result = { "我", "购买", "了", "道具", "和", "服装" }; AssertAnalyzesTo(ca, sentence, result); // set stop-words from the outer world - must yield same behavior ca = new SmartChineseAnalyzer( #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT, #pragma warning restore 612, 618 SmartChineseAnalyzer.GetDefaultStopSet()); AssertAnalyzesTo(ca, sentence, result); }
public void TestReusableTokenStream() { Analyzer a = new SmartChineseAnalyzer( #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT); #pragma warning restore 612, 618 AssertAnalyzesTo(a, "我购买 Tests 了道具和服装", new String[] { "我", "购买", "test", "了", "道具", "和", "服装" }, new int[] { 0, 1, 4, 10, 11, 13, 14 }, new int[] { 1, 3, 9, 11, 13, 14, 16 }); AssertAnalyzesTo(a, "我购买了道具和服装。", new String[] { "我", "购买", "了", "道具", "和", "服装" }, new int[] { 0, 1, 3, 4, 6, 7 }, new int[] { 1, 3, 4, 6, 7, 9 }); }
public void TestLargeSentence() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 5000; i++) { sb.append("我购买了道具和服装"); } Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); TokenStream stream = analyzer.GetTokenStream("", sb.toString()); try { stream.Reset(); while (stream.IncrementToken()) { } stream.End(); } finally { IOUtils.DisposeWhileHandlingException(stream); } }