public virtual void TestCaptureState() { // init a first instance AttributeSource src = new AttributeSource(); ICharTermAttribute termAtt = src.AddAttribute <ICharTermAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); termAtt.Append("TestTerm"); typeAtt.Type = "TestType"; int hashCode = src.GetHashCode(); AttributeSource.State state = src.CaptureState(); // modify the attributes termAtt.SetEmpty().Append("AnotherTestTerm"); typeAtt.Type = "AnotherTestType"; Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different"); src.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore"); // restore into an exact configured copy AttributeSource copy = new AttributeSource(); copy.AddAttribute <ICharTermAttribute>(); copy.AddAttribute <ITypeAttribute>(); copy.RestoreState(state); Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore"); Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore"); // init a second instance (with attributes in different order and one additional attribute) AttributeSource src2 = new AttributeSource(); typeAtt = src2.AddAttribute <ITypeAttribute>(); IFlagsAttribute flagsAtt = src2.AddAttribute <IFlagsAttribute>(); termAtt = src2.AddAttribute <ICharTermAttribute>(); flagsAtt.Flags = 12345; src2.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(12345, flagsAtt.Flags, "FlagsAttribute should not be touched"); // init a third instance missing one Attribute AttributeSource src3 = new AttributeSource(); termAtt = src3.AddAttribute <ICharTermAttribute>(); try { src3.RestoreState(state); Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // pass } }
public override Boolean IncrementToken() { if (synonymStack.Count > 0) { string syn = synonymStack.Pop(); RestoreState(current); termAtt.SetTermBuffer(syn); // This ensures the new word is treated as a synonym posIncrAtt.SetPositionIncrement(0); return true; } if (!input.IncrementToken()) return false; // Push synonyms to stack if (AddAliasesToStack()) { // Save current token current = CaptureState(); } return true; }
public virtual void TestCaptureState() { // init a first instance AttributeSource src = new AttributeSource(); TermAttribute termAtt = (TermAttribute)src.AddAttribute(typeof(TermAttribute)); TypeAttribute typeAtt = (TypeAttribute)src.AddAttribute(typeof(TypeAttribute)); termAtt.SetTermBuffer("TestTerm"); typeAtt.SetType("TestType"); int hashCode = src.GetHashCode(); AttributeSource.State state = src.CaptureState(); // modify the attributes termAtt.SetTermBuffer("AnotherTestTerm"); typeAtt.SetType("AnotherTestType"); Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different"); src.RestoreState(state); Assert.AreEqual("TestTerm", termAtt.Term()); Assert.AreEqual("TestType", typeAtt.Type()); Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore"); // restore into an exact configured copy AttributeSource copy = new AttributeSource(); copy.AddAttribute(typeof(TermAttribute)); copy.AddAttribute(typeof(TypeAttribute)); copy.RestoreState(state); Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore"); Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore"); // init a second instance (with attributes in different order and one additional attribute) AttributeSource src2 = new AttributeSource(); typeAtt = (TypeAttribute)src2.AddAttribute(typeof(TypeAttribute)); Lucene.Net.Analysis.Tokenattributes.FlagsAttribute flagsAtt = (Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)src2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)); termAtt = (TermAttribute)src2.AddAttribute(typeof(TermAttribute)); flagsAtt.SetFlags(12345); src2.RestoreState(state); Assert.AreEqual("TestTerm", termAtt.Term()); Assert.AreEqual("TestType", typeAtt.Type()); Assert.AreEqual(12345, flagsAtt.GetFlags(), "FlagsAttribute should not be touched"); // init a third instance missing one Attribute AttributeSource src3 = new AttributeSource(); termAtt = (TermAttribute)src3.AddAttribute(typeof(TermAttribute)); try { src3.RestoreState(state); Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); } catch (System.ArgumentException iae) { // pass } }
public override bool IncrementToken() { if (synonymStack.Count > 0) { var syn = synonymStack.Pop(); RestoreState(current); termAtt.SetTermBuffer(syn); posIncrAtt.PositionIncrement = 0; return true; } if (!input.IncrementToken()) return false; if (addAliasesToStack()) current = CaptureState(); return true; }
public override void Reset() { base.Reset(); nextToken = null; shingleBufferPosition = 0; shingleBuf.Clear(); numFillerTokensToInsert = 0; currentToken = null; hasCurrentToken = false; }
/* * Get the next token from the input stream and push it on the token buffer. * If we encounter a token with position increment > 1, we put filler tokens * on the token buffer. * <p/> * Returns null when the end of the input stream is reached. * @return the next token, or null if at end of input stream * @throws IOException if the input stream has a problem */ private bool GetNextToken() { while (true) { if (numFillerTokensToInsert > 0) { if (currentToken == null) { currentToken = CaptureState(); } else { RestoreState(currentToken); } numFillerTokensToInsert--; // A filler token occupies no space offsetAtt.SetOffset(offsetAtt.StartOffset, offsetAtt.StartOffset); termAtt.SetTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.Length); return true; } if (hasCurrentToken) { if (currentToken != null) { RestoreState(currentToken); currentToken = null; } hasCurrentToken = false; return true; } if (!input.IncrementToken()) return false; hasCurrentToken = true; if (posIncrAtt.PositionIncrement > 1) { numFillerTokensToInsert = posIncrAtt.PositionIncrement - 1; } } }
/* (non-Javadoc) * @see org.apache.lucene.analysis.TokenStream#next() */ public sealed override bool IncrementToken() { while (true) { if (nextToken == null) { if (!FillShingleBuffer()) { return false; } } nextToken = shingleBuf.First.Value; if (outputUnigrams) { if (shingleBufferPosition == 0) { RestoreState(nextToken); posIncrAtt.PositionIncrement = 1; shingleBufferPosition++; return true; } } else if (shingleBufferPosition % this.maxShingleSize == 0) { shingleBufferPosition++; } if (shingleBufferPosition < shingleBuf.Count) { RestoreState(nextToken); typeAtt.Type = tokenType; offsetAtt.SetOffset(offsetAtt.StartOffset, endOffsets[shingleBufferPosition]); StringBuilder buf = shingles[shingleBufferPosition]; int termLength = buf.Length; char[] TermBuffer = termAtt.TermBuffer(); if (TermBuffer.Length < termLength) TermBuffer = termAtt.ResizeTermBuffer(termLength); buf.CopyTo(0, TermBuffer, 0, termLength); termAtt.SetTermLength(termLength); if ((!outputUnigrams) && shingleBufferPosition % this.maxShingleSize == 1) { posIncrAtt.PositionIncrement = 1; } else { posIncrAtt.PositionIncrement = 0; } shingleBufferPosition++; if (shingleBufferPosition == shingleBuf.Count) { nextToken = null; shingleBufferPosition = 0; } return true; } else { nextToken = null; shingleBufferPosition = 0; } } }
internal /*private*/ void SetFinalState(AttributeSource.State finalState) { this.finalState = finalState; }
public IteratorAnonymousInnerClassHelper(AttributeSource.State initState) { state = initState; }
public override void Reset() { base.Reset(); tokens.Clear(); current = null; }
public override bool IncrementToken() { if (tokens.Count > 0) { Debug.Assert(current != null); CompoundToken token = tokens.First.Value; tokens.RemoveFirst(); RestoreState(current); // keep all other attributes untouched termAtt.SetEmpty().Append(token.txt); offsetAtt.SetOffset(token.startOffset, token.endOffset); posIncAtt.PositionIncrement = 0; return true; } current = null; // not really needed, but for safety if (input.IncrementToken()) { // Only words longer than minWordSize get processed if (termAtt.Length >= this.minWordSize) { Decompose(); // only capture the state if we really need it for producing new tokens if (tokens.Count > 0) { current = CaptureState(); } } // return original token: return true; } else { return false; } }
public IteratorAnonymousInnerClassHelper(AttributeSource outerInstance, AttributeSource.State initState) { this.outerInstance = outerInstance; this.initState = initState; state = initState; }
public void Reset() { state = null; consumed = true; keepOrig = false; matched = false; }
public IteratorAnonymousClass(AttributeSource.State initState) { state = initState; }