public virtual void TestReusableTokenStream() { Analyzer analyzer = new CJKAnalyzer(LuceneVersion.LUCENE_30); string str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053"; TestToken[] out_tokens = new TestToken[] { newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3051\u3053", 11, 13, CJKTokenizer.DOUBLE_TOKEN_TYPE) }; checkCJKTokenReusable(analyzer, str, out_tokens); str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053"; TestToken[] out_tokens2 = new TestToken[] { newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 11, 13, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3053", 14, 15, CJKTokenizer.DOUBLE_TOKEN_TYPE) }; checkCJKTokenReusable(analyzer, str, out_tokens2); }
internal virtual void checkCJKTokenReusable(Analyzer a, string str, TestToken[] out_tokens) { Analyzer analyzer = new CJKAnalyzer(LuceneVersion.LUCENE_30); string[] terms = new string[out_tokens.Length]; int[] startOffsets = new int[out_tokens.Length]; int[] endOffsets = new int[out_tokens.Length]; string[] types = new string[out_tokens.Length]; for (int i = 0; i < out_tokens.Length; i++) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; types[i] = out_tokens[i].type; } AssertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null); }
internal virtual void checkCJKTokenReusable(Analyzer a, string str, TestToken[] out_tokens) { Analyzer analyzer = new CJKAnalyzer(LuceneVersion.LUCENE_30); string[] terms = new string[out_tokens.Length]; int[] startOffsets = new int[out_tokens.Length]; int[] endOffsets = new int[out_tokens.Length]; string[] types = new string[out_tokens.Length]; for (int i = 0; i < out_tokens.Length; i++) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; types[i] = out_tokens[i].type; } AssertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null); }
public virtual void TestTokenStream() { Analyzer analyzer = new CJKAnalyzer(LuceneVersion.LUCENE_30); AssertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02", new string[] { "\u4e00\u4e01", "\u4e01\u4e02" }); }
public virtual void TestReusableTokenStream() { Analyzer analyzer = new CJKAnalyzer(LuceneVersion.LUCENE_30); string str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053"; TestToken[] out_tokens = new TestToken[] { newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3051\u3053", 11, 13, CJKTokenizer.DOUBLE_TOKEN_TYPE) }; checkCJKTokenReusable(analyzer, str, out_tokens); str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053"; TestToken[] out_tokens2 = new TestToken[] { newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 11, 13, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3053", 14, 15, CJKTokenizer.DOUBLE_TOKEN_TYPE) }; checkCJKTokenReusable(analyzer, str, out_tokens2); }
public virtual void TestTokenStream() { Analyzer analyzer = new CJKAnalyzer(LuceneVersion.LUCENE_30); AssertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02", new string[] { "\u4e00\u4e01", "\u4e01\u4e02" }); }