Exemplo n.º 1
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testStopList() throws java.io.IOException
        public virtual void testStopList()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
            StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer");
            try
            {
              assertNotNull(stream);
              CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));

              stream.reset();
              while (stream.incrementToken())
              {
            string text = termAtt.ToString();
            assertFalse(stopWordsSet.contains(text));
              }
              stream.end();
            }
            finally
            {
              IOUtils.closeWhileHandlingException(stream);
            }
        }
Exemplo n.º 2
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStop() throws Exception
 public virtual void testStop()
 {
     Analyzer a = new StopAnalyzer(TEST_VERSION_CURRENT);
     assertAnalyzesTo(a, "foo bar FOO BAR", new string[] {"foo", "bar", "foo", "bar"});
     assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new string[] {"foo", "bar", "foo", "bar"});
 }
Exemplo n.º 3
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testStopListPositions() throws java.io.IOException
        public virtual void testStopListPositions()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
            StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            string s = "This is a good test of the english stop analyzer with positions";
            int[] expectedIncr = new int[] {1, 1, 1, 3, 1, 1, 1, 2, 1};
            TokenStream stream = newStop.tokenStream("test", s);
            try
            {
              assertNotNull(stream);
              int i = 0;
              CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));
              PositionIncrementAttribute posIncrAtt = stream.addAttribute(typeof(PositionIncrementAttribute));

              stream.reset();
              while (stream.incrementToken())
              {
            string text = termAtt.ToString();
            assertFalse(stopWordsSet.contains(text));
            assertEquals(expectedIncr[i++],posIncrAtt.PositionIncrement);
              }
              stream.end();
            }
            finally
            {
              IOUtils.closeWhileHandlingException(stream);
            }
        }
        public static List <string> cutwords(string words, string analyzer)
        {
            List <string> results = new List <string>();

            switch (analyzer)
            {
            case "Lucene.Net.Analysis.SimpleAnalyzer":
                SimpleAnalyzer            analyzerInstance0 = new SimpleAnalyzer();
                TokenStream               ts0 = analyzerInstance0.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token0;
                while ((token0 = ts0.Next()) != null)
                {
                    results.Add(token0.TermText());
                }
                ts0.Close();
                analyzerInstance0.Close();
                break;

            case "Lucene.Net.Analysis.KeywordAnalyzer":
                KeywordAnalyzer           analyzerInstance1 = new KeywordAnalyzer();
                TokenStream               ts1 = analyzerInstance1.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token1;
                while ((token1 = ts1.Next()) != null)
                {
                    results.Add(token1.TermText());
                }
                ts1.Close();
                analyzerInstance1.Close();
                break;

            case "Lucene.Net.Analysis.StopAnalyzer":
                StopAnalyzer analyzerInstance2 = new StopAnalyzer();
                TokenStream  ts2 = analyzerInstance2.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token2;
                while ((token2 = ts2.Next()) != null)
                {
                    results.Add(token2.TermText());
                }
                ts2.Close();
                analyzerInstance2.Close();
                break;

            case "Lucene.Net.Analysis.WhitespaceAnalyzer":
                WhitespaceAnalyzer        analyzerInstance3 = new WhitespaceAnalyzer();
                TokenStream               ts3 = analyzerInstance3.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token3;
                while ((token3 = ts3.Next()) != null)
                {
                    results.Add(token3.TermText());
                }
                ts3.Close();
                analyzerInstance3.Close();
                break;

            case "Lucene.Net.Analysis.PanGu.PanGuAnalyzer":
                PanGu.Segment.Init(@"G:\CProjects\xueba\DataMining\DataMining\PanGu\PanGu.xml");
                PanGuAnalyzer             analyzerInstance4 = new PanGuAnalyzer();
                TokenStream               ts4 = analyzerInstance4.TokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token4;
                while ((token4 = ts4.Next()) != null)
                {
                    results.Add(token4.TermText());
                }
                ts4.Close();
                analyzerInstance4.Close();
                break;

            case "Lucene.Net.Analysis.Standard.StandardAnalyzer":
                StandardAnalyzer          analyzerInstance5 = new StandardAnalyzer();
                TokenStream               ts5 = analyzerInstance5.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token5;
                while ((token5 = ts5.Next()) != null)
                {
                    results.Add(token5.TermText());
                }
                ts5.Close();
                analyzerInstance5.Close();
                break;

            case "Lucene.China.ChineseAnalyzer":
                ChineseAnalyzer           analyzerInstance6 = new ChineseAnalyzer();
                TokenStream               ts6 = analyzerInstance6.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token6;
                while ((token6 = ts6.Next()) != null)
                {
                    results.Add(token6.TermText());
                }
                ts6.Close();
                analyzerInstance6.Close();
                break;
            }
            return(results);
        }