//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopList() throws java.io.IOException public virtual void testStopList() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer"); try { assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); stream.reset(); while (stream.incrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStop() throws Exception public virtual void testStop() { Analyzer a = new StopAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "foo bar FOO BAR", new string[] {"foo", "bar", "foo", "bar"}); assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new string[] {"foo", "bar", "foo", "bar"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopListPositions() throws java.io.IOException public virtual void testStopListPositions() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); string s = "This is a good test of the english stop analyzer with positions"; int[] expectedIncr = new int[] {1, 1, 1, 3, 1, 1, 1, 2, 1}; TokenStream stream = newStop.tokenStream("test", s); try { assertNotNull(stream); int i = 0; CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); PositionIncrementAttribute posIncrAtt = stream.addAttribute(typeof(PositionIncrementAttribute)); stream.reset(); while (stream.incrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); assertEquals(expectedIncr[i++],posIncrAtt.PositionIncrement); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }
public static List <string> cutwords(string words, string analyzer) { List <string> results = new List <string>(); switch (analyzer) { case "Lucene.Net.Analysis.SimpleAnalyzer": SimpleAnalyzer analyzerInstance0 = new SimpleAnalyzer(); TokenStream ts0 = analyzerInstance0.ReusableTokenStream("", new StringReader(words)); Lucene.Net.Analysis.Token token0; while ((token0 = ts0.Next()) != null) { results.Add(token0.TermText()); } ts0.Close(); analyzerInstance0.Close(); break; case "Lucene.Net.Analysis.KeywordAnalyzer": KeywordAnalyzer analyzerInstance1 = new KeywordAnalyzer(); TokenStream ts1 = analyzerInstance1.ReusableTokenStream("", new StringReader(words)); Lucene.Net.Analysis.Token token1; while ((token1 = ts1.Next()) != null) { results.Add(token1.TermText()); } ts1.Close(); analyzerInstance1.Close(); break; case "Lucene.Net.Analysis.StopAnalyzer": StopAnalyzer analyzerInstance2 = new StopAnalyzer(); TokenStream ts2 = analyzerInstance2.ReusableTokenStream("", new StringReader(words)); Lucene.Net.Analysis.Token token2; while ((token2 = ts2.Next()) != null) { results.Add(token2.TermText()); } ts2.Close(); analyzerInstance2.Close(); break; case "Lucene.Net.Analysis.WhitespaceAnalyzer": WhitespaceAnalyzer analyzerInstance3 = new WhitespaceAnalyzer(); TokenStream ts3 = analyzerInstance3.ReusableTokenStream("", new StringReader(words)); Lucene.Net.Analysis.Token token3; while ((token3 = ts3.Next()) != null) { results.Add(token3.TermText()); } ts3.Close(); analyzerInstance3.Close(); break; case "Lucene.Net.Analysis.PanGu.PanGuAnalyzer": PanGu.Segment.Init(@"G:\CProjects\xueba\DataMining\DataMining\PanGu\PanGu.xml"); PanGuAnalyzer analyzerInstance4 = new PanGuAnalyzer(); TokenStream ts4 = analyzerInstance4.TokenStream("", new StringReader(words)); Lucene.Net.Analysis.Token token4; while ((token4 = ts4.Next()) != null) { results.Add(token4.TermText()); } ts4.Close(); analyzerInstance4.Close(); break; case "Lucene.Net.Analysis.Standard.StandardAnalyzer": StandardAnalyzer analyzerInstance5 = new StandardAnalyzer(); TokenStream ts5 = analyzerInstance5.ReusableTokenStream("", new StringReader(words)); Lucene.Net.Analysis.Token token5; while ((token5 = ts5.Next()) != null) { results.Add(token5.TermText()); } ts5.Close(); analyzerInstance5.Close(); break; case "Lucene.China.ChineseAnalyzer": ChineseAnalyzer analyzerInstance6 = new ChineseAnalyzer(); TokenStream ts6 = analyzerInstance6.ReusableTokenStream("", new StringReader(words)); Lucene.Net.Analysis.Token token6; while ((token6 = ts6.Next()) != null) { results.Add(token6.TermText()); } ts6.Close(); analyzerInstance6.Close(); break; } return(results); }