//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReusableTokenStream() throws Exception public virtual void testReusableTokenStream() { Analyzer a = new ChineseAnalyzer(); assertAnalyzesTo(a, "中华人民共和国", new string[] { "中", "华", "人", "民", "共", "和", "国" }, new int[] { 0, 1, 2, 3, 4, 5, 6 }, new int[] { 1, 2, 3, 4, 5, 6, 7 }); assertAnalyzesTo(a, "北京市", new string[] { "北", "京", "市" }, new int[] { 0, 1, 2 }, new int[] { 1, 2, 3 }); }
/* * ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNumerics() throws Exception public virtual void testNumerics() { Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this); assertAnalyzesTo(justTokenizer, "中1234", new string[] {"中", "1234"}); // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token. Analyzer a = new ChineseAnalyzer(); assertAnalyzesTo(a, "中1234", new string[] {"中"}); }
/* * ChineseTokenizer tokenizes english similar to SimpleAnalyzer. * it will lowercase terms automatically. * * ChineseFilter has an english stopword list, it also removes any single character tokens. * the stopword list is case-sensitive. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEnglish() throws Exception public virtual void testEnglish() { Analyzer chinese = new ChineseAnalyzer(); assertAnalyzesTo(chinese, "This is a Test. b c d", new string[] {"test"}); Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this); assertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] {"this", "is", "a", "test", "b", "c", "d"}); Analyzer justFilter = new JustChineseFilterAnalyzer(this); assertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] {"This", "Test."}); }
/* * ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNumerics() throws Exception public virtual void testNumerics() { Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this); assertAnalyzesTo(justTokenizer, "中1234", new string[] { "中", "1234" }); // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token. Analyzer a = new ChineseAnalyzer(); assertAnalyzesTo(a, "中1234", new string[] { "中" }); }
/* * ChineseTokenizer tokenizes english similar to SimpleAnalyzer. * it will lowercase terms automatically. * * ChineseFilter has an english stopword list, it also removes any single character tokens. * the stopword list is case-sensitive. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEnglish() throws Exception public virtual void testEnglish() { Analyzer chinese = new ChineseAnalyzer(); assertAnalyzesTo(chinese, "This is a Test. b c d", new string[] { "test" }); Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this); assertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] { "this", "is", "a", "test", "b", "c", "d" }); Analyzer justFilter = new JustChineseFilterAnalyzer(this); assertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] { "This", "Test." }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReusableTokenStream() throws Exception public virtual void testReusableTokenStream() { Analyzer a = new ChineseAnalyzer(); assertAnalyzesTo(a, "中华人民共和国", new string[] {"中", "华", "人", "民", "共", "和", "国"}, new int[] {0, 1, 2, 3, 4, 5, 6}, new int[] {1, 2, 3, 4, 5, 6, 7}); assertAnalyzesTo(a, "北京市", new string[] {"北", "京", "市"}, new int[] {0, 1, 2}, new int[] {1, 2, 3}); }