public virtual void TestMultipleSources() { TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.ToString()), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.NewSinkTokenStream(dogFilter); TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.NewSinkTokenStream(theFilter); tee1.Reset(); TokenStream source1 = new CachingTokenFilter(tee1); tee1.AddAttribute <ICheckClearAttributesAttribute>(); dogDetector.AddAttribute <ICheckClearAttributesAttribute>(); theDetector.AddAttribute <ICheckClearAttributesAttribute>(); TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer2.ToString()), MockTokenizer.WHITESPACE, false)); tee2.AddSinkTokenStream(dogDetector); tee2.AddSinkTokenStream(theDetector); TokenStream source2 = tee2; AssertTokenStreamContents(source1, tokens1); AssertTokenStreamContents(source2, tokens2); AssertTokenStreamContents(theDetector, new string[] { "The", "the", "The", "the" }); AssertTokenStreamContents(dogDetector, new string[] { "Dogs", "Dogs" }); source1.Reset(); TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); string[] lowerCaseTokens = new string[tokens1.Length]; for (int i = 0; i < tokens1.Length; i++) { lowerCaseTokens[i] = CultureInfo.InvariantCulture.TextInfo.ToLower(tokens1[i]); } AssertTokenStreamContents(lowerCasing, lowerCaseTokens); }
public virtual void Test() { TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4); string test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.NewSinkTokenStream(sinkFilter); int count = 0; tee.Reset(); while (tee.IncrementToken()) { count++; } int sinkCount = 0; rangeToks.Reset(); while (rangeToks.IncrementToken()) { sinkCount++; } assertTrue(count + " does not equal: " + 10, count == 10); assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2); }
public virtual void TestLooseDateFormat() { DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(CultureInfo.InvariantCulture); string test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/2/2006"; TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter.SinkTokenStream sink = tee.NewSinkTokenStream(sinkFilter); int count = 0; tee.Reset(); while (tee.IncrementToken()) { count++; } assertTrue(count + " does not equal: " + 18, count == 18); int sinkCount = 0; sink.Reset(); while (sink.IncrementToken()) { sinkCount++; } assertTrue("sink Size: " + sinkCount + " is not: " + 2, sinkCount == 2); }
public virtual void Test() { TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D"); string test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false))); TeeSinkTokenFilter.SinkTokenStream sink = ttf.NewSinkTokenStream(sinkFilter); bool seenDogs = false; ICharTermAttribute termAtt = ttf.AddAttribute <ICharTermAttribute>(); ITypeAttribute typeAtt = ttf.AddAttribute <ITypeAttribute>(); ttf.Reset(); while (ttf.IncrementToken()) { if (termAtt.ToString().Equals("dogs", StringComparison.Ordinal)) { seenDogs = true; assertTrue(typeAtt.Type + " is not equal to " + "D", typeAtt.Type.Equals("D", StringComparison.Ordinal) == true); } else { assertTrue(typeAtt.Type + " is not null and it should be", typeAtt.Type.Equals("word", StringComparison.Ordinal)); } } assertTrue(seenDogs + " does not equal: " + true, seenDogs == true); int sinkCount = 0; sink.Reset(); while (sink.IncrementToken()) { sinkCount++; } assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1); }