TokenStream() public method

public TokenStream ( String fieldName, TextReader reader ) : TokenStream
fieldName String
reader TextReader
return TokenStream
        public virtual void TestNoStopwords()
        {
            // Note: an empty list of fields passed in
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Enumerable.Empty<string>(), 1);
            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "quick");
            AssertTokenStreamContents(protectedTokenStream, new string[] { "quick" });

            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
Ejemplo n.º 2
0
        public virtual void TestNoStopwords()
        {
            // Note: an empty list of fields passed in
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Enumerable.Empty <string>(), 1);
            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "quick");

            AssertTokenStreamContents(protectedTokenStream, new string[] { "quick" });

            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
Ejemplo n.º 3
0
        public virtual void TestNoFieldNamePollution()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("repetitiveField"), 10);

            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");

            // Check filter set up OK
            AssertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "boring");
            // Filter should not prevent stopwords in one field being used in another
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
Ejemplo n.º 4
0
        public virtual void TestDefaultStopwordsAllFields()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader);
            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");

            AssertTokenStreamContents(protectedTokenStream, new string[0]); // Default stop word filtering will remove boring
        }
Ejemplo n.º 5
0
        public virtual void TestTokenStream()
        {
            QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), reader, 10);
            TokenStream ts = a.TokenStream("repetitiveField", "this boring");

            AssertTokenStreamContents(ts, new string[] { "this" });
        }
Ejemplo n.º 6
0
        public virtual void TestStopwordsAllFieldsMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f);

            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");

            // A filter on terms in > one half of docs remove boring
            AssertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > half of docs should not remove vaguelyBoring
            AssertTokenStreamContents(protectedTokenStream, new string[] { "vaguelyboring" });

            protectedAnalyzer    = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f);
            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > quarter of docs should remove vaguelyBoring
            AssertTokenStreamContents(protectedTokenStream, new string[0]);
        }
Ejemplo n.º 7
0
        public virtual void TestStopwordsPerFieldMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField"), 1f / 2f);
            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");

            // A filter on one Field should not affect queries on another
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });

            protectedAnalyzer    = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField", "repetitiveField"), 1f / 2f);
            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
            // A filter on the right Field should affect queries on it
            AssertTokenStreamContents(protectedTokenStream, new string[0]);
        }
        public virtual void TestStopwordsAllFieldsMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f);

            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
            // A filter on terms in > one half of docs remove boring
            AssertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > half of docs should not remove vaguelyBoring
            AssertTokenStreamContents(protectedTokenStream, new string[] { "vaguelyboring" });

            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f);
            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring");
            // A filter on terms in > quarter of docs should remove vaguelyBoring
            AssertTokenStreamContents(protectedTokenStream, new string[0]);
        }
 public virtual void TestDefaultStopwordsAllFields()
 {
     protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader);
     TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
     AssertTokenStreamContents(protectedTokenStream, new string[0]); // Default stop word filtering will remove boring
 }
 public virtual void TestTokenStream()
 {
     QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), reader, 10);
     TokenStream ts = a.TokenStream("repetitiveField", "this boring");
     AssertTokenStreamContents(ts, new string[] { "this" });
 }
        public virtual void TestNoFieldNamePollution()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("repetitiveField"), 10);

            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
            // Check filter set up OK
            AssertTokenStreamContents(protectedTokenStream, new string[0]);

            protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "boring");
            // Filter should not prevent stopwords in one field being used in another
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });
        }
        public virtual void TestStopwordsPerFieldMaxPercentDocs()
        {
            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField"), 1f / 2f);
            TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
            // A filter on one Field should not affect queries on another
            AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" });

            protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField", "repetitiveField"), 1f / 2f);
            protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring");
            // A filter on the right Field should affect queries on it
            AssertTokenStreamContents(protectedTokenStream, new string[0]);
        }
 public void TestTokenStream()
 {
     QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer());
     a.AddStopWords(reader, 10);
     TokenStream ts = a.TokenStream("repetitiveField", new StringReader("this boring"));
     ITermAttribute termAtt = ts.GetAttribute<ITermAttribute>();
     Assert.True(ts.IncrementToken());
     Assert.AreEqual("this", termAtt.Term);
     Assert.False(ts.IncrementToken());
 }