Exemplo n.º 1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testTokenStream() throws Exception
        public virtual void testTokenStream()
        {
            QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), reader, 10);
            TokenStream ts = a.tokenStream("repetitiveField", "this boring");

            assertTokenStreamContents(ts, new string[] { "this" });
        }
Exemplo n.º 2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testDefaultStopwordsAllFields() throws Exception
        public virtual void testDefaultStopwordsAllFields()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader);
            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");

            assertTokenStreamContents(protectedTokenStream, new string[0]);     // Default stop word filtering will remove boring
        }
Exemplo n.º 3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNoStopwords() throws Exception
        public virtual void testNoStopwords()
        {
            // Note: an empty list of fields passed in
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, System.Linq.Enumerable.Empty <string>(), 1);
            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "quick");

            assertTokenStreamContents(protectedTokenStream, new string[] { "quick" });

            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
            assertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
Exemplo n.º 4
0
        public void TestWrappingNonReusableAnalyzer()
        {
            QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new NonreusableAnalyzer());

            a.AddStopWords(reader, 10);
            int numHits = Search(a, "repetitiveField:boring");

            Assert.True(numHits == 0);
            numHits = Search(a, "repetitiveField:vaguelyboring");
            Assert.True(numHits == 0);
        }
Exemplo n.º 5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopwordsPerFieldMaxDocFreq() throws Exception
        public virtual void testStopwordsPerFieldMaxDocFreq()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10);
            int numStopWords = protectedAnalyzer.getStopWords("repetitiveField").length;

            assertTrue("Should have identified stop words", numStopWords > 0);

            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField", "variedField"), 10);
            int numNewStopWords = protectedAnalyzer.getStopWords("repetitiveField").length + protectedAnalyzer.getStopWords("variedField").length;

            assertTrue("Should have identified more stop words", numNewStopWords > numStopWords);
        }
Exemplo n.º 6
0
        public void TestTokenStream()
        {
            QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer());

            a.AddStopWords(reader, 10);
            TokenStream    ts      = a.TokenStream("repetitiveField", new StringReader("this boring"));
            ITermAttribute termAtt = ts.GetAttribute <ITermAttribute>();

            Assert.True(ts.IncrementToken());
            Assert.AreEqual("this", termAtt.Term);
            Assert.False(ts.IncrementToken());
        }
Exemplo n.º 7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopwordsPerFieldMaxPercentDocs() throws Exception
        public virtual void testStopwordsPerFieldMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f);
            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");

            // A filter on one Field should not affect queries on another
            assertTokenStreamContents(protectedTokenStream, new string[] { "boring" });

            protectedAnalyzer    = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f);
            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
            // A filter on the right Field should affect queries on it
            assertTokenStreamContents(protectedTokenStream, new string[0]);
        }
Exemplo n.º 8
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNoFieldNamePollution() throws Exception
        public virtual void testNoFieldNamePollution()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10);

            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");

            // Check filter set up OK
            assertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "boring");
            // Filter should not prevent stopwords in one field being used in another
            assertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
Exemplo n.º 9
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopwordsAllFieldsMaxPercentDocs() throws Exception
        public virtual void testStopwordsAllFieldsMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f);

            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");

            // A filter on terms in > one half of docs remove boring
            assertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > half of docs should not remove vaguelyBoring
            assertTokenStreamContents(protectedTokenStream, new string[] { "vaguelyboring" });

            protectedAnalyzer    = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f);
            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > quarter of docs should remove vaguelyBoring
            assertTokenStreamContents(protectedTokenStream, new string[0]);
        }
Exemplo n.º 10
0
        public override void SetUp()
        {
            dir         = new RAMDirectory();
            appAnalyzer = new WhitespaceAnalyzer();
            IndexWriter writer  = new IndexWriter(dir, appAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            int         numDocs = 200;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                String   variedFieldValue     = variedFieldValues[i % variedFieldValues.Length];
                String   repetitiveFieldValue = repetitiveFieldValues[i % repetitiveFieldValues.Length];
                doc.Add(new Field("variedField", variedFieldValue, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("repetitiveField", repetitiveFieldValue, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            writer.Close();
            reader            = IndexReader.Open(dir, true);
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, appAnalyzer);
            base.SetUp();
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testDefaultStopwordsAllFields() throws Exception
 public virtual void testDefaultStopwordsAllFields()
 {
     protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader);
     TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
     assertTokenStreamContents(protectedTokenStream, new string[0]); // Default stop word filtering will remove boring
 }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testNoStopwords() throws Exception
        public virtual void testNoStopwords()
        {
            // Note: an empty list of fields passed in
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, System.Linq.Enumerable.Empty<string>(), 1);
            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "quick");
            assertTokenStreamContents(protectedTokenStream, new string[]{"quick"});

            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
            assertTokenStreamContents(protectedTokenStream, new string[]{"boring"});
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testStopwordsAllFieldsMaxPercentDocs() throws Exception
        public virtual void testStopwordsAllFieldsMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f);

            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
            // A filter on terms in > one half of docs remove boring
            assertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring");
             // A filter on terms in > half of docs should not remove vaguelyBoring
            assertTokenStreamContents(protectedTokenStream, new string[]{"vaguelyboring"});

            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f);
            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring");
             // A filter on terms in > quarter of docs should remove vaguelyBoring
            assertTokenStreamContents(protectedTokenStream, new string[0]);
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testStopwordsPerFieldMaxDocFreq() throws Exception
        public virtual void testStopwordsPerFieldMaxDocFreq()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10);
            int numStopWords = protectedAnalyzer.getStopWords("repetitiveField").length;
            assertTrue("Should have identified stop words", numStopWords > 0);

            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField", "variedField"), 10);
            int numNewStopWords = protectedAnalyzer.getStopWords("repetitiveField").length + protectedAnalyzer.getStopWords("variedField").length;
            assertTrue("Should have identified more stop words", numNewStopWords > numStopWords);
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testStopwordsPerFieldMaxPercentDocs() throws Exception
        public virtual void testStopwordsPerFieldMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f);
            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
            // A filter on one Field should not affect queries on another
            assertTokenStreamContents(protectedTokenStream, new string[]{"boring"});

            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f);
            protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
            // A filter on the right Field should affect queries on it
            assertTokenStreamContents(protectedTokenStream, new string[0]);
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testTokenStream() throws Exception
 public virtual void testTokenStream()
 {
     QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), reader, 10);
     TokenStream ts = a.tokenStream("repetitiveField", "this boring");
     assertTokenStreamContents(ts, new string[] {"this"});
 }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testNoFieldNamePollution() throws Exception
        public virtual void testNoFieldNamePollution()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10);

            TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring");
            // Check filter set up OK
            assertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "boring");
            // Filter should not prevent stopwords in one field being used in another
            assertTokenStreamContents(protectedTokenStream, new string[]{"boring"});
        }