Exemple #1
0
        public virtual void TestNoStopwords()
        {
            // Note: an empty list of fields passed in
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.EmptyList <string>(), 1);
            TokenStream protectedTokenStream = protectedAnalyzer.GetTokenStream("variedField", "quick");

            AssertTokenStreamContents(protectedTokenStream, new string[] { "quick" });

            protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "boring");
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
Exemple #2
0
        public virtual void TestNoFieldNamePollution()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, new string[] { "repetitiveField" }, 10);

            TokenStream protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "boring");

            // Check filter set up OK
            AssertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.GetTokenStream("variedField", "boring");
            // Filter should not prevent stopwords in one field being used in another
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
Exemple #3
0
        public virtual void TestDefaultStopwordsAllFields()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader);
            TokenStream protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "boring");

            AssertTokenStreamContents(protectedTokenStream, new string[0]); // Default stop word filtering will remove boring
        }
Exemple #4
0
        public virtual void TestTokenStream()
        {
            QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false), reader, 10);
            TokenStream ts = a.GetTokenStream("repetitiveField", "this boring");

            AssertTokenStreamContents(ts, new string[] { "this" });
        }
Exemple #5
0
        public virtual void TestStopwordsAllFieldsMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f);

            TokenStream protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "boring");

            // A filter on terms in > one half of docs remove boring
            AssertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > half of docs should not remove vaguelyBoring
            AssertTokenStreamContents(protectedTokenStream, new string[] { "vaguelyboring" });

            protectedAnalyzer    = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f);
            protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > quarter of docs should remove vaguelyBoring
            AssertTokenStreamContents(protectedTokenStream, new string[0]);
        }
Exemple #6
0
        public virtual void TestStopwordsPerFieldMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, new string[] { "variedField" }, 1f / 2f);
            TokenStream protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "boring");

            // A filter on one Field should not affect queries on another
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });

            protectedAnalyzer    = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, new string[] { "variedField", "repetitiveField" }, 1f / 2f);
            protectedTokenStream = protectedAnalyzer.GetTokenStream("repetitiveField", "boring");
            // A filter on the right Field should affect queries on it
            AssertTokenStreamContents(protectedTokenStream, new string[0]);
        }