this is an abstract class; subclasses must override #IncrementToken()
NOTE: Subclasses overriding #IncrementToken() must call AttributeSource#ClearAttributes() before setting attributes.
public void IncrementsOffsetCorrectlyWithAnotherReader2() { const string input = @"test1 <a href=""foo"">testlink</a> test2 test3"; CharFilter filter = new HTMLStripCharFilter(CharReader.Get(new StringReader(input))); Tokenizer t = new Tokenizer(filter); string token = string.Empty; List<Token> results = new List<Token>(); t.NextToken(out token); Assert.Equal(0, filter.CorrectOffset(t.Offset)); Assert.Equal(5, t.LengthInSource); t.NextToken(out token); Assert.Equal(20, filter.CorrectOffset(t.Offset)); Assert.Equal(8, t.LengthInSource); t.NextToken(out token); Assert.Equal(33, filter.CorrectOffset(t.Offset)); Assert.Equal(5, t.LengthInSource); t.NextToken(out token); Assert.Equal(39, filter.CorrectOffset(t.Offset)); Assert.Equal(5, t.LengthInSource); }
public void IncrementsOffsetCorrectlyWithAnotherReader() { int[] expectedOffsets = { 0, 5, 10, 15 }; int curPos = 0; string token = string.Empty; Tokenizer t = new Tokenizer( new HTMLStripCharFilter(CharReader.Get(new System.IO.StringReader(@"test<a href=""foo"">test</a>test test")))); while (true) { Tokenizer.TokenType token_type = t.NextToken(out token); if (token_type == 0) break; Assert.Equal(expectedOffsets[curPos++], t.Offset); Assert.Equal(4, t.LengthInSource); } }
/// <summary> /// Creates a new <seealso cref="TokenStreamComponents"/> instance. /// </summary> /// <param name="source"> /// the analyzer's tokenizer </param> public TokenStreamComponents(Tokenizer source) { this.Source = source; this.Sink = source; }
/// <summary> /// Creates a new <seealso cref="TokenStreamComponents"/> instance. /// </summary> /// <param name="source"> /// the analyzer's tokenizer </param> /// <param name="result"> /// the analyzer's resulting token stream </param> public TokenStreamComponents(Tokenizer source, TokenStream result) { this.Source = source; this.Sink = result; }
public override bool IncrementToken() { if (currentRealToken == null) { bool next = realStream.IncrementToken(); if (!next) { return false; } //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset()); ClearAttributes(); termAtt.SetTermBuffer(realTermAtt.Term); offsetAtt.SetOffset(realOffsetAtt.StartOffset, realOffsetAtt.EndOffset); posIncrAtt.PositionIncrement = realPosIncrAtt.PositionIncrement; String expansions = synonyms[realTermAtt.Term]; if (expansions == null) { return true; } st = new Tokenizer(expansions, ","); if (st.HasMoreTokens()) { currentRealToken = new Token(realOffsetAtt.StartOffset, realOffsetAtt.EndOffset); currentRealToken.SetTermBuffer(realTermAtt.Term); } return true; } else { String tok = st.NextToken(); ClearAttributes(); termAtt.SetTermBuffer(tok); offsetAtt.SetOffset(currentRealToken.StartOffset, currentRealToken.EndOffset); posIncrAtt.PositionIncrement = 0; if (!st.HasMoreTokens()) { currentRealToken = null; st = null; } return true; } }
public TokenFilterAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper2 outerInstance, Tokenizer tokenizer) : base(tokenizer) { this.OuterInstance = outerInstance; first = true; termAtt = AddAttribute<ICharTermAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); }
/// <summary> /// Creates a new <see cref="TokenStreamComponents"/> instance. /// </summary> /// <param name="source"> /// the analyzer's tokenizer </param> public TokenStreamComponents(Tokenizer source) { this.m_source = source; this.m_sink = source; }
/// <summary> /// Creates a new <see cref="TokenStreamComponents"/> instance. /// </summary> /// <param name="source"> /// the analyzer's tokenizer </param> /// <param name="result"> /// the analyzer's resulting token stream </param> public TokenStreamComponents(Tokenizer source, TokenStream result) { this.m_source = source; this.m_sink = result; }