protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return(new TokenStreamComponents(t, cgf)); }
public virtual void TestCaseSensitive() { const string input = "How The s a brown s cow d like A B thing?"; MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); AssertTokenStreamContents(cgf, new string[] { "How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?" }); }
public virtual void TestFirstAndLastStopWord() { const string input = "the of"; MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); AssertTokenStreamContents(nsf, new string[] { "the_of" }); }
public virtual void TestRandomStrings() { Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return(new TokenStreamComponents(t, cgf)); }); CheckRandomData(Random, a, 1000 * RandomMultiplier); Analyzer b = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return(new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf))); }); CheckRandomData(Random, b, 1000 * RandomMultiplier); }
public virtual void TestQueryReset() { const string input = "How the s a brown s cow d like A B thing?"; WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); ICharTermAttribute term = wt.AddAttribute<ICharTermAttribute>(); nsf.Reset(); assertTrue(nsf.IncrementToken()); assertEquals("How_the", term.ToString()); assertTrue(nsf.IncrementToken()); assertEquals("the_s", term.ToString()); nsf.Dispose(); wt.Reader = new StringReader(input); nsf.Reset(); assertTrue(nsf.IncrementToken()); assertEquals("How_the", term.ToString()); }
public virtual void TestQueryReset() { const string input = "How the s a brown s cow d like A B thing?"; WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); ICharTermAttribute term = wt.AddAttribute <ICharTermAttribute>(); nsf.Reset(); assertTrue(nsf.IncrementToken()); assertEquals("How_the", term.ToString()); assertTrue(nsf.IncrementToken()); assertEquals("the_s", term.ToString()); nsf.Dispose(); wt.SetReader(new StringReader(input)); nsf.Reset(); assertTrue(nsf.IncrementToken()); assertEquals("How_the", term.ToString()); }
public override TokenStream Create(TokenStream input) { var commonGrams = new CommonGramsFilter(m_luceneMatchVersion, input, commonWords); return(commonGrams); }
/// <summary> /// Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter /// </summary> /// <param name="input"> CommonGramsFilter the QueryFilter will use </param> public CommonGramsQueryFilter(CommonGramsFilter input) : base(input) { typeAttribute = AddAttribute <ITypeAttribute>(); posIncAttribute = AddAttribute <IPositionIncrementAttribute>(); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf)); }
/// <summary> /// Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter /// </summary> /// <param name="input"> CommonGramsFilter the QueryFilter will use </param> public CommonGramsQueryFilter(CommonGramsFilter input) : base(input) { }
public override TokenStream Create(TokenStream input) { var commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords); return commonGrams; }
/// <summary> /// Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter /// </summary> /// <param name="input"> CommonGramsFilter the QueryFilter will use </param> public CommonGramsQueryFilter(CommonGramsFilter input) : base(input) { typeAttribute = AddAttribute<ITypeAttribute>(); posIncAttribute = AddAttribute<IPositionIncrementAttribute>(); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65); TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); return new TokenStreamComponents(t, f); }