public TokenStream ( String fieldName, TextReader reader ) : TokenStream | ||
fieldName | String | |
reader | TextReader | |
return | TokenStream |
public virtual void TestNoStopwords() { // Note: an empty list of fields passed in protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Enumerable.Empty<string>(), 1); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "quick"); AssertTokenStreamContents(protectedTokenStream, new string[] { "quick" }); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" }); }
public virtual void TestNoStopwords() { // Note: an empty list of fields passed in protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Enumerable.Empty <string>(), 1); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "quick"); AssertTokenStreamContents(protectedTokenStream, new string[] { "quick" }); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" }); }
public virtual void TestNoFieldNamePollution() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("repetitiveField"), 10); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // Check filter set up OK AssertTokenStreamContents(protectedTokenStream, new string[0]); protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "boring"); // Filter should not prevent stopwords in one field being used in another AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" }); }
public virtual void TestDefaultStopwordsAllFields() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); AssertTokenStreamContents(protectedTokenStream, new string[0]); // Default stop word filtering will remove boring }
public virtual void TestTokenStream() { QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), reader, 10); TokenStream ts = a.TokenStream("repetitiveField", "this boring"); AssertTokenStreamContents(ts, new string[] { "this" }); }
public virtual void TestStopwordsAllFieldsMaxPercentDocs() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // A filter on terms in > one half of docs remove boring AssertTokenStreamContents(protectedTokenStream, new string[0]); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > half of docs should not remove vaguelyBoring AssertTokenStreamContents(protectedTokenStream, new string[] { "vaguelyboring" }); protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > quarter of docs should remove vaguelyBoring AssertTokenStreamContents(protectedTokenStream, new string[0]); }
public virtual void TestStopwordsPerFieldMaxPercentDocs() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField"), 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // A filter on one Field should not affect queries on another AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" }); protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField", "repetitiveField"), 1f / 2f); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // A filter on the right Field should affect queries on it AssertTokenStreamContents(protectedTokenStream, new string[0]); }
public virtual void TestStopwordsAllFieldsMaxPercentDocs() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // A filter on terms in > one half of docs remove boring AssertTokenStreamContents(protectedTokenStream, new string[0]); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > half of docs should not remove vaguelyBoring AssertTokenStreamContents(protectedTokenStream, new string[] { "vaguelyboring" }); protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > quarter of docs should remove vaguelyBoring AssertTokenStreamContents(protectedTokenStream, new string[0]); }
public virtual void TestDefaultStopwordsAllFields() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); AssertTokenStreamContents(protectedTokenStream, new string[0]); // Default stop word filtering will remove boring }
public virtual void TestTokenStream() { QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), reader, 10); TokenStream ts = a.TokenStream("repetitiveField", "this boring"); AssertTokenStreamContents(ts, new string[] { "this" }); }
public virtual void TestNoFieldNamePollution() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("repetitiveField"), 10); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // Check filter set up OK AssertTokenStreamContents(protectedTokenStream, new string[0]); protectedTokenStream = protectedAnalyzer.TokenStream("variedField", "boring"); // Filter should not prevent stopwords in one field being used in another AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" }); }
public virtual void TestStopwordsPerFieldMaxPercentDocs() { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField"), 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // A filter on one Field should not affect queries on another AssertTokenStreamContents(protectedTokenStream, new string[] { "boring" }); protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.AsList("variedField", "repetitiveField"), 1f / 2f); protectedTokenStream = protectedAnalyzer.TokenStream("repetitiveField", "boring"); // A filter on the right Field should affect queries on it AssertTokenStreamContents(protectedTokenStream, new string[0]); }
public void TestTokenStream() { QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer()); a.AddStopWords(reader, 10); TokenStream ts = a.TokenStream("repetitiveField", new StringReader("this boring")); ITermAttribute termAtt = ts.GetAttribute<ITermAttribute>(); Assert.True(ts.IncrementToken()); Assert.AreEqual("this", termAtt.Term); Assert.False(ts.IncrementToken()); }