public SimplePayloadFilter(TokenStream input) : base(input) { Pos = 0; PayloadAttr = input.AddAttribute<IPayloadAttribute>(); TermAttr = input.AddAttribute<ICharTermAttribute>(); }
public PayloadFilter(TokenStream input, System.String fieldName) : base(input) { this.fieldName = fieldName; pos = 0; i = 0; posIncrAttr = input.AddAttribute <IPositionIncrementAttribute>(); payloadAttr = input.AddAttribute <IPayloadAttribute>(); termAttr = input.AddAttribute <ITermAttribute>(); }
public MockPayloadFilter(TokenStream input, string fieldName) : base(input) { this.FieldName = fieldName; Pos = 0; i = 0; PosIncrAttr = input.AddAttribute<IPositionIncrementAttribute>(); PayloadAttr = input.AddAttribute<IPayloadAttribute>(); TermAttr = input.AddAttribute<ICharTermAttribute>(); }
/// <summary> Adds term frequencies found by tokenizing text from reader into the Map words</summary> /// <param name="r">a source of text to be tokenized /// </param> /// <param name="termFreqMap">a Map of terms and their frequencies /// </param> /// <param name="fieldName">Used by analyzer for any special per-field analysis /// </param> protected void AddTermFrequencies(System.IO.TextReader r, System.Collections.IDictionary termFreqMap, System.String fieldName) { TokenStream ts = analyzer.TokenStream(fieldName, r); var termAtt = ts.AddAttribute <ITermAttribute>(); int tokenCount = 0; while (ts.IncrementToken()) { // for every token System.String word = termAtt.Term; tokenCount++; if (tokenCount > maxNumTokensParsed) { break; } if (IsNoiseWord(word)) { continue; } // increment frequency Int cnt = (Int)termFreqMap[word]; if (cnt == null) { termFreqMap[word] = new Int(); } else { cnt.x++; } } }
public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int? finalOffset) { Assert.IsNotNull(output); ICheckClearAttributesAttribute checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>(); Assert.IsTrue(ts.HasAttribute<ITermAttribute>(), "has no TermAttribute"); ITermAttribute termAtt = ts.GetAttribute<ITermAttribute>(); IOffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute"); offsetAtt = ts.GetAttribute<IOffsetAttribute>(); } ITypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute"); typeAtt = ts.GetAttribute<ITypeAttribute>(); } IPositionIncrementAttribute posIncrAtt = null; if (posIncrements != null) { Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute"); posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>(); } ts.Reset(); for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetTermBuffer("bogusTerm"); if (offsetAtt != null) offsetAtt.SetOffset(14584724, 24683243); if (typeAtt != null) typeAtt.Type = "bogusType"; if (posIncrAtt != null) posIncrAtt.PositionIncrement = 45987657; checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.Term, "term " + i); if (startOffsets != null) Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i); if (endOffsets != null) Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i); if (types != null) Assert.AreEqual(types[i], typeAtt.Type, "type " + i); if (posIncrements != null) Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i); } Assert.IsFalse(ts.IncrementToken(), "end of stream"); ts.End(); if (finalOffset.HasValue) Assert.AreEqual(finalOffset, offsetAtt.EndOffset, "finalOffset "); ts.Close(); }
/// <summary> /// If inputText is non-null, and the TokenStream has /// offsets, we include the surface form in each arc's /// label. /// </summary> public TokenStreamToDot(string inputText, TokenStream @in, TextWriter @out) { this.@in = @in; this.@out = @out; this.InputText = inputText; TermAtt = @in.AddAttribute<ICharTermAttribute>(); PosIncAtt = @in.AddAttribute<IPositionIncrementAttribute>(); PosLengthAtt = @in.AddAttribute<IPositionLengthAttribute>(); if (@in.HasAttribute<IOffsetAttribute>()) { OffsetAtt = @in.AddAttribute<IOffsetAttribute>(); } else { OffsetAtt = null; } }
private static void DisplayTokens(TokenStream stream) { TermAttribute term = (TermAttribute) stream.AddAttribute(typeof(TermAttribute)); while (stream.IncrementToken()) { Console.WriteLine("[{0}] ", term.Term()); } }
public static void DisplayTokens(TokenStream stream) { // error in Lucene.Net? should work, look in source code why not // source: Lucene in Action, page ?? var term = stream.AddAttribute<TermAttribute>(); while (stream.IncrementToken()) { #if LuceneV303 Trace.WriteLine("[" + term.Term + "] "); #endif } }
public virtual void TestStopList() { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { System.String text = termAtt.Term(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(1, posIncrAtt.GetPositionIncrement()); // by default stop tokenizer does not apply increments. } }
public virtual void TestStopList() { var stopWordsSet = Support.Compatibility.SetFactory.CreateHashSet <string>(); stopWordsSet.Add("good"); stopWordsSet.Add("test"); stopWordsSet.Add("analyzer"); StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); ITermAttribute termAtt = stream.GetAttribute <ITermAttribute>(); IPositionIncrementAttribute posIncrAtt = stream.AddAttribute <IPositionIncrementAttribute>(); while (stream.IncrementToken()) { System.String text = termAtt.Term; Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(1, posIncrAtt.PositionIncrement); // in 2.4 stop tokenizer does not apply increments. } }
public override void Run() { try { foreach (KeyValuePair <string, BytesRef> mapping in Map) { string term = mapping.Key; BytesRef expected = mapping.Value; IOException priorException = null; TokenStream ts = Analyzer.TokenStream("fake", new StreamReader(term)); try { ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>(); BytesRef bytes = termAtt.BytesRef; ts.Reset(); Assert.IsTrue(ts.IncrementToken()); termAtt.FillBytesRef(); Assert.AreEqual(expected, bytes); Assert.IsFalse(ts.IncrementToken()); ts.End(); } catch (IOException e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } } catch (IOException e) { throw (Exception)e; } }
public override void Run() { try { foreach (var mapping in this.map) { string term = mapping.Key; BytesRef expected = mapping.Value; Exception priorException = null; // LUCENENET: No need to cast to IOExcpetion TokenStream ts = this.analyzer.GetTokenStream("fake", new StringReader(term)); try { ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>(); BytesRef bytes = termAtt.BytesRef; ts.Reset(); Assert.IsTrue(ts.IncrementToken()); termAtt.FillBytesRef(); Assert.AreEqual(expected, bytes); Assert.IsFalse(ts.IncrementToken()); ts.End(); } catch (Exception e) when(e.IsIOException()) { priorException = e; } finally { IOUtils.DisposeWhileHandlingException(priorException, ts); } } } catch (Exception e) when(e.IsIOException()) { throw RuntimeException.Create(e); } }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute)); _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute)); _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute)); _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute)); _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute)); _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute)); }
private static void AssertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) { var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute)); var posIncrAtt = (PositionIncrementAttribute) ts.AddAttribute(typeof (PositionIncrementAttribute)); var payloadAtt = (PayloadAttribute) ts.AddAttribute(typeof (PayloadAttribute)); var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute)); Assert.IsTrue(ts.IncrementToken()); Assert.AreEqual(text, termAtt.Term()); Assert.AreEqual(positionIncrement, posIncrAtt.GetPositionIncrement()); Assert.AreEqual(boost, payloadAtt.GetPayload() == null ? 1f : PayloadHelper.DecodeFloat(payloadAtt.GetPayload().GetData()), 0); Assert.AreEqual(startOffset, offsetAtt.StartOffset()); Assert.AreEqual(endOffset, offsetAtt.EndOffset()); }
public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int?finalOffset) { Assert.IsNotNull(output); CheckClearAttributesAttribute checkClearAtt = (CheckClearAttributesAttribute)ts.AddAttribute(typeof(CheckClearAttributesAttribute)); Assert.IsTrue(ts.HasAttribute(typeof(TermAttribute)), "has no TermAttribute"); TermAttribute termAtt = (TermAttribute)ts.GetAttribute(typeof(TermAttribute)); OffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute(typeof(OffsetAttribute)), "has no OffsetAttribute"); offsetAtt = (OffsetAttribute)ts.GetAttribute(typeof(OffsetAttribute)); } TypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute(typeof(TypeAttribute)), "has no TypeAttribute"); typeAtt = (TypeAttribute)ts.GetAttribute(typeof(TypeAttribute)); } PositionIncrementAttribute posIncrAtt = null; if (posIncrements != null) { Assert.IsTrue(ts.HasAttribute(typeof(PositionIncrementAttribute)), "has no PositionIncrementAttribute"); posIncrAtt = (PositionIncrementAttribute)ts.GetAttribute(typeof(PositionIncrementAttribute)); } ts.Reset(); for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetTermBuffer("bogusTerm"); if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.SetType("bogusType"); } if (posIncrAtt != null) { posIncrAtt.SetPositionIncrement(45987657); } checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.Term(), "term " + i); if (startOffsets != null) { Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i); } if (endOffsets != null) { Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i); } if (types != null) { Assert.AreEqual(types[i], typeAtt.Type(), "type " + i); } if (posIncrements != null) { Assert.AreEqual(posIncrements[i], posIncrAtt.GetPositionIncrement(), "posIncrement " + i); } } Assert.IsFalse(ts.IncrementToken(), "end of stream"); ts.End(); if (finalOffset.HasValue) { Assert.AreEqual(finalOffset, offsetAtt.EndOffset(), "finalOffset "); } ts.Close(); }
// offsetsAreCorrect also validates: // - graph offsets are correct (all tokens leaving from // pos X have the same startOffset; all tokens // arriving to pos Y have the same endOffset) // - offsets only move forwards (startOffset >= // lastStartOffset) public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect) { // LUCENENET: Bug fix: NUnit throws an exception when something fails. // This causes Dispose() to be skipped and it pollutes other tests indicating false negatives. // Added this try-finally block to fix this. try { Assert.IsNotNull(output); var checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>(); ICharTermAttribute termAtt = null; if (output.Length > 0) { Assert.IsTrue(ts.HasAttribute<ICharTermAttribute>(), "has no CharTermAttribute"); termAtt = ts.GetAttribute<ICharTermAttribute>(); } IOffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute"); offsetAtt = ts.GetAttribute<IOffsetAttribute>(); } ITypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute"); typeAtt = ts.GetAttribute<ITypeAttribute>(); } IPositionIncrementAttribute posIncrAtt = null; if (posIncrements != null || finalPosInc != null) { Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute"); posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>(); } IPositionLengthAttribute posLengthAtt = null; if (posLengths != null) { Assert.IsTrue(ts.HasAttribute<IPositionLengthAttribute>(), "has no PositionLengthAttribute"); posLengthAtt = ts.GetAttribute<IPositionLengthAttribute>(); } IKeywordAttribute keywordAtt = null; if (keywordAtts != null) { Assert.IsTrue(ts.HasAttribute<IKeywordAttribute>(), "has no KeywordAttribute"); keywordAtt = ts.GetAttribute<IKeywordAttribute>(); } // Maps position to the start/end offset: IDictionary<int?, int?> posToStartOffset = new Dictionary<int?, int?>(); IDictionary<int?, int?> posToEndOffset = new Dictionary<int?, int?>(); ts.Reset(); int pos = -1; int lastStartOffset = 0; for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetEmpty().Append("bogusTerm"); if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.Type = "bogusType"; } if (posIncrAtt != null) { posIncrAtt.PositionIncrement = 45987657; } if (posLengthAtt != null) { posLengthAtt.PositionLength = 45987653; } if (keywordAtt != null) { keywordAtt.Keyword = (i & 1) == 0; } bool reset = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString()); if (startOffsets != null) { Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i); } if (endOffsets != null) { Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i); } if (types != null) { Assert.AreEqual(types[i], typeAtt.Type, "type " + i); } if (posIncrements != null) { Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i); } if (posLengths != null) { Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i); } if (keywordAtts != null) { Assert.AreEqual(keywordAtts[i], keywordAtt.Keyword, "keywordAtt " + i); } // we can enforce some basic things about a few attributes even if the caller doesn't check: if (offsetAtt != null) { int startOffset = offsetAtt.StartOffset(); int endOffset = offsetAtt.EndOffset(); if (finalOffset != null) { Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset"); Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset); } if (offsetsAreCorrect) { Assert.IsTrue(offsetAtt.StartOffset() >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset); lastStartOffset = offsetAtt.StartOffset(); } if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null) { // Validate offset consistency in the graph, ie // all tokens leaving from a certain pos have the // same startOffset, and all tokens arriving to a // certain pos have the same endOffset: int posInc = posIncrAtt.PositionIncrement; pos += posInc; int posLength = posLengthAtt.PositionLength; if (!posToStartOffset.ContainsKey(pos)) { // First time we've seen a token leaving from this position: posToStartOffset[pos] = startOffset; //System.out.println(" + s " + pos + " -> " + startOffset); } else { // We've seen a token leaving from this position // before; verify the startOffset is the same: //System.out.println(" + vs " + pos + " -> " + startOffset); Assert.AreEqual((int)posToStartOffset[pos], startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); } int endPos = pos + posLength; if (!posToEndOffset.ContainsKey(endPos)) { // First time we've seen a token arriving to this position: posToEndOffset[endPos] = endOffset; //System.out.println(" + e " + endPos + " -> " + endOffset); } else { // We've seen a token arriving to this position // before; verify the endOffset is the same: //System.out.println(" + ve " + endPos + " -> " + endOffset); Assert.AreEqual((int)posToEndOffset[endPos], endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); } } } if (posIncrAtt != null) { if (i == 0) { Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1"); } else { Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0"); } } if (posLengthAtt != null) { Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1"); } } if (ts.IncrementToken()) { Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt); } // repeat our extra safety checks for End() ts.ClearAttributes(); if (termAtt != null) { termAtt.SetEmpty().Append("bogusTerm"); } if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.Type = "bogusType"; } if (posIncrAtt != null) { posIncrAtt.PositionIncrement = 45987657; } if (posLengthAtt != null) { posLengthAtt.PositionLength = 45987653; } var reset_ = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before ts.End(); Assert.IsTrue(checkClearAtt.AndResetClearCalled, "super.End()/ClearAttributes() was not called correctly in End()"); if (finalOffset != null) { Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset(), "finalOffset"); } if (offsetAtt != null) { Assert.IsTrue(offsetAtt.EndOffset() >= 0, "finalOffset must be >= 0"); } if (finalPosInc != null) { Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc"); } ts.Dispose(); } catch (Exception) { //ts.Reset(); ts.ClearAttributes(); ts.End(); ts.Dispose(); throw; } }
// offsetsAreCorrect also validates: // - graph offsets are correct (all tokens leaving from // pos X have the same startOffset; all tokens // arriving to pos Y have the same endOffset) // - offsets only move forwards (startOffset >= // lastStartOffset) public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int?finalOffset, int?finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect) { Assert.IsNotNull(output); var checkClearAtt = ts.AddAttribute <ICheckClearAttributesAttribute>(); ICharTermAttribute termAtt = null; if (output.Length > 0) { Assert.IsTrue(ts.HasAttribute <ICharTermAttribute>(), "has no CharTermAttribute"); termAtt = ts.GetAttribute <ICharTermAttribute>(); } IOffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute <IOffsetAttribute>(), "has no OffsetAttribute"); offsetAtt = ts.GetAttribute <IOffsetAttribute>(); } ITypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute <ITypeAttribute>(), "has no TypeAttribute"); typeAtt = ts.GetAttribute <ITypeAttribute>(); } IPositionIncrementAttribute posIncrAtt = null; if (posIncrements != null || finalPosInc != null) { Assert.IsTrue(ts.HasAttribute <IPositionIncrementAttribute>(), "has no PositionIncrementAttribute"); posIncrAtt = ts.GetAttribute <IPositionIncrementAttribute>(); } IPositionLengthAttribute posLengthAtt = null; if (posLengths != null) { Assert.IsTrue(ts.HasAttribute <IPositionLengthAttribute>(), "has no PositionLengthAttribute"); posLengthAtt = ts.GetAttribute <IPositionLengthAttribute>(); } IKeywordAttribute keywordAtt = null; if (keywordAtts != null) { Assert.IsTrue(ts.HasAttribute <IKeywordAttribute>(), "has no KeywordAttribute"); keywordAtt = ts.GetAttribute <IKeywordAttribute>(); } // Maps position to the start/end offset: IDictionary <int?, int?> posToStartOffset = new Dictionary <int?, int?>(); IDictionary <int?, int?> posToEndOffset = new Dictionary <int?, int?>(); ts.Reset(); int pos = -1; int lastStartOffset = 0; for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetEmpty().Append("bogusTerm"); if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.Type = "bogusType"; } if (posIncrAtt != null) { posIncrAtt.PositionIncrement = 45987657; } if (posLengthAtt != null) { posLengthAtt.PositionLength = 45987653; } if (keywordAtt != null) { keywordAtt.Keyword = (i & 1) == 0; } bool reset = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString()); if (startOffsets != null) { Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i); } if (endOffsets != null) { Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i); } if (types != null) { Assert.AreEqual(types[i], typeAtt.Type, "type " + i); } if (posIncrements != null) { Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i); } if (posLengths != null) { Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i); } if (keywordAtts != null) { Assert.AreEqual(keywordAtts[i], keywordAtt.Keyword, "keywordAtt " + i); } // we can enforce some basic things about a few attributes even if the caller doesn't check: if (offsetAtt != null) { int startOffset = offsetAtt.StartOffset(); int endOffset = offsetAtt.EndOffset(); if (finalOffset != null) { Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset"); Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset); } if (offsetsAreCorrect) { Assert.IsTrue(offsetAtt.StartOffset() >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset); lastStartOffset = offsetAtt.StartOffset(); } if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null) { // Validate offset consistency in the graph, ie // all tokens leaving from a certain pos have the // same startOffset, and all tokens arriving to a // certain pos have the same endOffset: int posInc = posIncrAtt.PositionIncrement; pos += posInc; int posLength = posLengthAtt.PositionLength; if (!posToStartOffset.ContainsKey(pos)) { // First time we've seen a token leaving from this position: posToStartOffset[pos] = startOffset; //System.out.println(" + s " + pos + " -> " + startOffset); } else { // We've seen a token leaving from this position // before; verify the startOffset is the same: //System.out.println(" + vs " + pos + " -> " + startOffset); Assert.AreEqual((int)posToStartOffset[pos], startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); } int endPos = pos + posLength; if (!posToEndOffset.ContainsKey(endPos)) { // First time we've seen a token arriving to this position: posToEndOffset[endPos] = endOffset; //System.out.println(" + e " + endPos + " -> " + endOffset); } else { // We've seen a token arriving to this position // before; verify the endOffset is the same: //System.out.println(" + ve " + endPos + " -> " + endOffset); Assert.AreEqual((int)posToEndOffset[endPos], endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); } } } if (posIncrAtt != null) { if (i == 0) { Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1"); } else { Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0"); } } if (posLengthAtt != null) { Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1"); } } if (ts.IncrementToken()) { Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt); } // repeat our extra safety checks for End() ts.ClearAttributes(); if (termAtt != null) { termAtt.SetEmpty().Append("bogusTerm"); } if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.Type = "bogusType"; } if (posIncrAtt != null) { posIncrAtt.PositionIncrement = 45987657; } if (posLengthAtt != null) { posLengthAtt.PositionLength = 45987653; } var reset_ = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before ts.End(); Assert.IsTrue(checkClearAtt.AndResetClearCalled, "super.End()/ClearAttributes() was not called correctly in End()"); if (finalOffset != null) { Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset(), "finalOffset"); } if (offsetAtt != null) { Assert.IsTrue(offsetAtt.EndOffset() >= 0, "finalOffset must be >= 0"); } if (finalPosInc != null) { Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc"); } ts.Dispose(); }
private static void AssertNext(TokenStream ts, String text, int startOffset, int endOffset) { var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute)); var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute)); Assert.IsTrue(ts.IncrementToken()); Assert.AreEqual(text, termAtt.Term()); Assert.AreEqual(startOffset, offsetAtt.StartOffset()); Assert.AreEqual(endOffset, offsetAtt.EndOffset()); }
/// <seealso cref="IScorer.Init"/> public TokenStream Init(TokenStream tokenStream) { position = -1; termAtt = tokenStream.AddAttribute<ITermAttribute>(); posIncAtt = tokenStream.AddAttribute<IPositionIncrementAttribute>(); if (!skipInitExtractor) { if (fieldWeightedSpanTerms != null) { fieldWeightedSpanTerms.Clear(); } return InitExtractor(tokenStream); } return null; }
private static void ConsumeStreamNewAPI(TokenStream stream) { stream.Reset(); PayloadAttribute payloadAtt = (PayloadAttribute) stream.AddAttribute(typeof(PayloadAttribute)); TermAttribute termAtt = (TermAttribute) stream.AddAttribute(typeof(TermAttribute)); int i = 0; while (stream.IncrementToken()) { System.String term = termAtt.Term(); Payload p = payloadAtt.GetPayload(); if (p != null && p.GetData().Length == 1 && p.GetData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) { Assert.IsTrue("tokenstream".Equals(term), "only TokenStream is a proper noun"); } else { Assert.IsFalse("tokenstream".Equals(term), "all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)"); } Assert.AreEqual(results[i], term); i++; } }
/// <summary> /// Creates a shingle filter based on a user defined matrix. /// /// The filter /will/ delete columns from the input matrix! You will not be able to reset the filter if you used this constructor. /// todo: don't touch the matrix! use a bool, set the input stream to null or something, and keep track of where in the matrix we are at. /// /// </summary> /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.Next(Token) is called the first time.</param> /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param> /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param> /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param> /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param> /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param> public ShingleMatrixFilter(Matrix.Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { Matrix = matrix; MinimumShingleSize = minimumShingleSize; MaximumShingleSize = maximumShingleSize; SpacerCharacter = spacerCharacter; IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; _settingsCodec = settingsCodec; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor // set the input to be an empty token stream, we already have the data. _input = new EmptyTokenStream(); _inTermAtt = (TermAttribute) _input.AddAttribute(typeof (TermAttribute)); _inPosIncrAtt = (PositionIncrementAttribute) _input.AddAttribute(typeof (PositionIncrementAttribute)); _inPayloadAtt = (PayloadAttribute) _input.AddAttribute(typeof (PayloadAttribute)); _inOffsetAtt = (OffsetAttribute) _input.AddAttribute(typeof (OffsetAttribute)); _inTypeAtt = (TypeAttribute) _input.AddAttribute(typeof (TypeAttribute)); _inFlagsAtt = (FlagsAttribute) _input.AddAttribute(typeof (FlagsAttribute)); }
/// <seealso cref="IFragmenter.Start(string, TokenStream)"/> public void Start(String originalText, TokenStream tokenStream) { position = -1; currentNumFrags = 1; textSize = originalText.Length; termAtt = tokenStream.AddAttribute<ITermAttribute>(); posIncAtt = tokenStream.AddAttribute<IPositionIncrementAttribute>(); offsetAtt = tokenStream.AddAttribute<IOffsetAttribute>(); }
private static void AssertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) { var termAtt = ts.AddAttribute<ITermAttribute>(); var posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>(); var payloadAtt = ts.AddAttribute<IPayloadAttribute>(); var offsetAtt = ts.AddAttribute<IOffsetAttribute>(); Assert.IsTrue(ts.IncrementToken()); Assert.AreEqual(text, termAtt.Term); Assert.AreEqual(positionIncrement, posIncrAtt.PositionIncrement); Assert.AreEqual(boost, payloadAtt.Payload == null ? 1f : PayloadHelper.DecodeFloat(payloadAtt.Payload.GetData()), 0); Assert.AreEqual(startOffset, offsetAtt.StartOffset); Assert.AreEqual(endOffset, offsetAtt.EndOffset); }
public SynonymTokenizer(TokenStream realStream, IDictionary<string, string> synonyms) { this.realStream = realStream; this.synonyms = synonyms; realTermAtt = realStream.AddAttribute<ITermAttribute>(); realPosIncrAtt = realStream.AddAttribute<IPositionIncrementAttribute>(); realOffsetAtt = realStream.AddAttribute<IOffsetAttribute>(); termAtt = AddAttribute<ITermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); }
/* (non-Javadoc) * @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String, org.apache.lucene.analysis.TokenStream) */ public void Start(String originalText, TokenStream stream) { offsetAtt = stream.AddAttribute<IOffsetAttribute>(); currentNumFrags = 1; }
public TokenGroup(TokenStream tokenStream) { NumTokens = 0; offsetAtt = tokenStream.AddAttribute<IOffsetAttribute>(); termAtt = tokenStream.AddAttribute<ITermAttribute>(); }
public PayloadFilter(TokenStream input, System.String fieldName):base(input) { this.fieldName = fieldName; pos = 0; i = 0; posIncrAttr = input.AddAttribute<IPositionIncrementAttribute>(); payloadAttr = input.AddAttribute<IPayloadAttribute>(); termAttr = input.AddAttribute<ITermAttribute>(); }
public PersianLemmatizationFilter(TokenStream input) : base(input) { lemmatizer = new PersianLemmatizer(); _termAtt = (TermAttribute)input.AddAttribute<ITermAttribute>(); }
/* (non-Javadoc) * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) */ public TokenStream Init(TokenStream tokenStream) { termAtt = tokenStream.AddAttribute<ITermAttribute>(); return null; }
/// <summary> Not an explicit test, just useful to print out some info on performance /// /// </summary> /// <throws> Exception </throws> public virtual void Performance() { int[] tokCount = new int[] { 100, 500, 1000, 2000, 5000, 10000 }; int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 }; for (int k = 0; k < tokCount.Length; k++) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----"); for (int i = 0; i < tokCount[k]; i++) { buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' '); } //make sure we produce the same tokens TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString())))); TokenStream sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, 100)); teeStream.ConsumeAllTokens(); TokenStream stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), 100); ITermAttribute tfTok = stream.AddAttribute <ITermAttribute>(); ITermAttribute sinkTok = sink.AddAttribute <ITermAttribute>(); for (int i = 0; stream.IncrementToken(); i++) { Assert.IsTrue(sink.IncrementToken()); Assert.IsTrue(tfTok.Equals(sinkTok) == true, tfTok + " is not equal to " + sinkTok + " at token: " + i); } //simulate two fields, each being analyzed once, for 20 documents for (int j = 0; j < modCounts.Length; j++) { int tfPos = 0; long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))); IPositionIncrementAttribute posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>(); while (stream.IncrementToken()) { tfPos += posIncrAtt.PositionIncrement; } stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), modCounts[j]); posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>(); while (stream.IncrementToken()) { tfPos += posIncrAtt.PositionIncrement; } } long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms"); int sinkPos = 0; //simulate one field with one sink start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString())))); sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, modCounts[j])); IPositionIncrementAttribute posIncrAtt = teeStream.GetAttribute <IPositionIncrementAttribute>(); while (teeStream.IncrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } //System.out.println("Modulo--------"); posIncrAtt = sink.GetAttribute <IPositionIncrementAttribute>(); while (sink.IncrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } } finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms"); Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos); } System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----"); } }
/// <summary> /// Pulls the graph (including <see cref="IPositionLengthAttribute"/> /// from the provided <see cref="TokenStream"/>, and creates the corresponding /// automaton where arcs are bytes (or Unicode code points /// if unicodeArcs = true) from each term. /// </summary> public virtual Automaton ToAutomaton(TokenStream @in) { var a = new Automaton(); bool deterministic = true; var posIncAtt = @in.AddAttribute <IPositionIncrementAttribute>(); var posLengthAtt = @in.AddAttribute <IPositionLengthAttribute>(); var offsetAtt = @in.AddAttribute <IOffsetAttribute>(); var termBytesAtt = @in.AddAttribute <ITermToBytesRefAttribute>(); BytesRef term = termBytesAtt.BytesRef; @in.Reset(); // Only temporarily holds states ahead of our current // position: RollingBuffer <Position> positions = new Positions(); int pos = -1; Position posData = null; int maxOffset = 0; while (@in.IncrementToken()) { int posInc = posIncAtt.PositionIncrement; if (!preservePositionIncrements && posInc > 1) { posInc = 1; } Debug.Assert(pos > -1 || posInc > 0); if (posInc > 0) { // New node: pos += posInc; posData = positions.Get(pos); Debug.Assert(posData.leaving == null); if (posData.arriving == null) { // No token ever arrived to this position if (pos == 0) { // OK: this is the first token posData.leaving = a.GetInitialState(); } else { // this means there's a hole (eg, StopFilter // does this): posData.leaving = new State(); AddHoles(a.GetInitialState(), positions, pos); } } else { posData.leaving = new State(); posData.arriving.AddTransition(new Transition(POS_SEP, posData.leaving)); if (posInc > 1) { // A token spanned over a hole; add holes // "under" it: AddHoles(a.GetInitialState(), positions, pos); } } positions.FreeBefore(pos); } else { // note: this isn't necessarily true. its just that we aren't surely det. // we could optimize this further (e.g. buffer and sort synonyms at a position) // but thats probably overkill. this is cheap and dirty deterministic = false; } int endPos = pos + posLengthAtt.PositionLength; termBytesAtt.FillBytesRef(); BytesRef termUTF8 = ChangeToken(term); int[] termUnicode = null; Position endPosData = positions.Get(endPos); if (endPosData.arriving == null) { endPosData.arriving = new State(); } State state = posData.leaving; int termLen = termUTF8.Length; if (unicodeArcs) { string utf16 = termUTF8.Utf8ToString(); termUnicode = new int[utf16.CodePointCount(0, utf16.Length)]; termLen = termUnicode.Length; for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp)) { termUnicode[j++] = cp = Character.CodePointAt(utf16, i); } } else { termLen = termUTF8.Length; } for (int byteIDX = 0; byteIDX < termLen; byteIDX++) { State nextState = byteIDX == termLen - 1 ? endPosData.arriving : new State(); int c; if (unicodeArcs) { c = termUnicode[byteIDX]; } else { c = termUTF8.Bytes[termUTF8.Offset + byteIDX] & 0xff; } state.AddTransition(new Transition(c, nextState)); state = nextState; } maxOffset = Math.Max(maxOffset, offsetAtt.EndOffset); } @in.End(); State endState = null; if (offsetAtt.EndOffset > maxOffset) { endState = new State(); endState.Accept = true; } pos++; while (pos <= positions.MaxPos) { posData = positions.Get(pos); if (posData.arriving != null) { if (endState != null) { posData.arriving.AddTransition(new Transition(POS_SEP, endState)); } else { posData.arriving.Accept = true; } } pos++; } //toDot(a); a.IsDeterministic = deterministic; return(a); }
/// <summary> /// Creates a shingle filter with ad hoc parameter settings. /// </summary> /// <param name="input">stream from which to construct the matrix</param> /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param> /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param> /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param> /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param> /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param> public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { _input = input; MinimumShingleSize = minimumShingleSize; MaximumShingleSize = maximumShingleSize; SpacerCharacter = spacerCharacter; IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; _settingsCodec = settingsCodec; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor _inTermAtt = (TermAttribute) input.AddAttribute(typeof (TermAttribute)); _inPosIncrAtt = (PositionIncrementAttribute) input.AddAttribute(typeof (PositionIncrementAttribute)); _inPayloadAtt = (PayloadAttribute) input.AddAttribute(typeof (PayloadAttribute)); _inOffsetAtt = (OffsetAttribute) input.AddAttribute(typeof (OffsetAttribute)); _inTypeAtt = (TypeAttribute) input.AddAttribute(typeof (TypeAttribute)); _inFlagsAtt = (FlagsAttribute) input.AddAttribute(typeof (FlagsAttribute)); }
// assert-methods start here private static void AssertNext(TokenStream ts, String text) { var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute)); Assert.IsTrue(ts.IncrementToken()); Assert.AreEqual(text, termAtt.Term()); }