Ejemplo n.º 1
0
        public virtual void TestCaptureState()
        {
            // init a first instance
            AttributeSource    src     = new AttributeSource();
            ICharTermAttribute termAtt = src.AddAttribute <ICharTermAttribute>();
            ITypeAttribute     typeAtt = src.AddAttribute <ITypeAttribute>();

            termAtt.Append("TestTerm");
            typeAtt.Type = "TestType";
            int hashCode = src.GetHashCode();

            AttributeSource.State state = src.CaptureState();

            // modify the attributes
            termAtt.SetEmpty().Append("AnotherTestTerm");
            typeAtt.Type = "AnotherTestType";
            Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different");

            src.RestoreState(state);
            Assert.AreEqual(termAtt.ToString(), "TestTerm");
            Assert.AreEqual(typeAtt.Type, "TestType");
            Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore");

            // restore into an exact configured copy
            AttributeSource copy = new AttributeSource();

            copy.AddAttribute <ICharTermAttribute>();
            copy.AddAttribute <ITypeAttribute>();
            copy.RestoreState(state);
            Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore");
            Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore");

            // init a second instance (with attributes in different order and one additional attribute)
            AttributeSource src2 = new AttributeSource();

            typeAtt = src2.AddAttribute <ITypeAttribute>();
            IFlagsAttribute flagsAtt = src2.AddAttribute <IFlagsAttribute>();

            termAtt        = src2.AddAttribute <ICharTermAttribute>();
            flagsAtt.Flags = 12345;

            src2.RestoreState(state);
            Assert.AreEqual(termAtt.ToString(), "TestTerm");
            Assert.AreEqual(typeAtt.Type, "TestType");
            Assert.AreEqual(12345, flagsAtt.Flags, "FlagsAttribute should not be touched");

            // init a third instance missing one Attribute
            AttributeSource src3 = new AttributeSource();

            termAtt = src3.AddAttribute <ICharTermAttribute>();
            try
            {
                src3.RestoreState(state);
                Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException");
            }
            catch (Exception iae) when(iae.IsIllegalArgumentException())
            {
                // pass
            }
        }
    public override Boolean IncrementToken()
    {
        if (synonymStack.Count > 0)
            {
                string syn = synonymStack.Pop();
                RestoreState(current);
                termAtt.SetTermBuffer(syn);

                // This ensures the new word is treated as a synonym
                posIncrAtt.SetPositionIncrement(0);
                return true;
            }

            if (!input.IncrementToken())
                return false;

            // Push synonyms to stack
            if (AddAliasesToStack())
            {
                // Save current token
                current = CaptureState();
            }

            return true;
    }
Ejemplo n.º 3
0
        public virtual void TestCaptureState()
        {
            // init a first instance
            AttributeSource src     = new AttributeSource();
            TermAttribute   termAtt = (TermAttribute)src.AddAttribute(typeof(TermAttribute));
            TypeAttribute   typeAtt = (TypeAttribute)src.AddAttribute(typeof(TypeAttribute));

            termAtt.SetTermBuffer("TestTerm");
            typeAtt.SetType("TestType");
            int hashCode = src.GetHashCode();

            AttributeSource.State state = src.CaptureState();

            // modify the attributes
            termAtt.SetTermBuffer("AnotherTestTerm");
            typeAtt.SetType("AnotherTestType");
            Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different");

            src.RestoreState(state);
            Assert.AreEqual("TestTerm", termAtt.Term());
            Assert.AreEqual("TestType", typeAtt.Type());
            Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore");

            // restore into an exact configured copy
            AttributeSource copy = new AttributeSource();

            copy.AddAttribute(typeof(TermAttribute));
            copy.AddAttribute(typeof(TypeAttribute));
            copy.RestoreState(state);
            Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore");
            Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore");

            // init a second instance (with attributes in different order and one additional attribute)
            AttributeSource src2 = new AttributeSource();

            typeAtt = (TypeAttribute)src2.AddAttribute(typeof(TypeAttribute));
            Lucene.Net.Analysis.Tokenattributes.FlagsAttribute flagsAtt = (Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)src2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute));
            termAtt = (TermAttribute)src2.AddAttribute(typeof(TermAttribute));
            flagsAtt.SetFlags(12345);

            src2.RestoreState(state);
            Assert.AreEqual("TestTerm", termAtt.Term());
            Assert.AreEqual("TestType", typeAtt.Type());
            Assert.AreEqual(12345, flagsAtt.GetFlags(), "FlagsAttribute should not be touched");

            // init a third instance missing one Attribute
            AttributeSource src3 = new AttributeSource();

            termAtt = (TermAttribute)src3.AddAttribute(typeof(TermAttribute));
            try
            {
                src3.RestoreState(state);
                Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException");
            }
            catch (System.ArgumentException iae)
            {
                // pass
            }
        }
Ejemplo n.º 4
0
        public override bool IncrementToken()
        {
            if (synonymStack.Count > 0)
            {
                var syn = synonymStack.Pop();
                RestoreState(current);
                termAtt.SetTermBuffer(syn);
                posIncrAtt.PositionIncrement = 0;
                return true;
            }

            if (!input.IncrementToken())
                return false;

            if (addAliasesToStack())
                current = CaptureState();

            return true;
        }
Ejemplo n.º 5
0
 public override void Reset()
 {
     base.Reset();
     nextToken = null;
     shingleBufferPosition = 0;
     shingleBuf.Clear();
     numFillerTokensToInsert = 0;
     currentToken = null;
     hasCurrentToken = false;
 }
Ejemplo n.º 6
0
        /*
         * Get the next token from the input stream and push it on the token buffer.
         * If we encounter a token with position increment > 1, we put filler tokens
         * on the token buffer.
         * <p/>
         * Returns null when the end of the input stream is reached.
         * @return the next token, or null if at end of input stream
         * @throws IOException if the input stream has a problem
         */
        private bool GetNextToken()
        {

            while (true)
            {
                if (numFillerTokensToInsert > 0)
                {
                    if (currentToken == null)
                    {
                        currentToken = CaptureState();
                    }
                    else
                    {
                        RestoreState(currentToken);
                    }
                    numFillerTokensToInsert--;
                    // A filler token occupies no space
                    offsetAtt.SetOffset(offsetAtt.StartOffset, offsetAtt.StartOffset);
                    termAtt.SetTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.Length);
                    return true;
                }

                if (hasCurrentToken)
                {
                    if (currentToken != null)
                    {
                        RestoreState(currentToken);
                        currentToken = null;
                    }
                    hasCurrentToken = false;
                    return true;
                }

                if (!input.IncrementToken()) return false;
                hasCurrentToken = true;

                if (posIncrAtt.PositionIncrement > 1)
                {
                    numFillerTokensToInsert = posIncrAtt.PositionIncrement - 1;
                }
            }
        }
Ejemplo n.º 7
0
        /* (non-Javadoc)
         * @see org.apache.lucene.analysis.TokenStream#next()
         */
        public sealed override bool IncrementToken()
        {
            while (true)
            {
                if (nextToken == null)
                {
                    if (!FillShingleBuffer())
                    {
                        return false;
                    }
                }

                nextToken = shingleBuf.First.Value;

                if (outputUnigrams)
                {
                    if (shingleBufferPosition == 0)
                    {
                        RestoreState(nextToken);
                        posIncrAtt.PositionIncrement = 1;
                        shingleBufferPosition++;
                        return true;
                    }
                }
                else if (shingleBufferPosition % this.maxShingleSize == 0)
                {
                    shingleBufferPosition++;
                }

                if (shingleBufferPosition < shingleBuf.Count)
                {
                    RestoreState(nextToken);
                    typeAtt.Type = tokenType;
                    offsetAtt.SetOffset(offsetAtt.StartOffset, endOffsets[shingleBufferPosition]);
                    StringBuilder buf = shingles[shingleBufferPosition];
                    int termLength = buf.Length;
                    char[] TermBuffer = termAtt.TermBuffer();
                    if (TermBuffer.Length < termLength)
                        TermBuffer = termAtt.ResizeTermBuffer(termLength);
                    buf.CopyTo(0, TermBuffer, 0, termLength);
                    termAtt.SetTermLength(termLength);
                    if ((!outputUnigrams) && shingleBufferPosition % this.maxShingleSize == 1)
                    {
                        posIncrAtt.PositionIncrement = 1;
                    }
                    else
                    {
                        posIncrAtt.PositionIncrement = 0;
                    }
                    shingleBufferPosition++;
                    if (shingleBufferPosition == shingleBuf.Count)
                    {
                        nextToken = null;
                        shingleBufferPosition = 0;
                    }
                    return true;
                }
                else
                {
                    nextToken = null;
                    shingleBufferPosition = 0;
                }
            }
        }
			internal /*private*/ void  SetFinalState(AttributeSource.State finalState)
			{
				this.finalState = finalState;
			}
Ejemplo n.º 9
0
 public IteratorAnonymousInnerClassHelper(AttributeSource.State initState)
 {
     state = initState;
 }
 public override void Reset()
 {
     base.Reset();
     tokens.Clear();
     current = null;
 }
        public override bool IncrementToken()
        {
            if (tokens.Count > 0)
            {
                Debug.Assert(current != null);
                CompoundToken token = tokens.First.Value; tokens.RemoveFirst();
                RestoreState(current); // keep all other attributes untouched
                termAtt.SetEmpty().Append(token.txt);
                offsetAtt.SetOffset(token.startOffset, token.endOffset);
                posIncAtt.PositionIncrement = 0;
                return true;
            }

            current = null; // not really needed, but for safety
            if (input.IncrementToken())
            {
                // Only words longer than minWordSize get processed
                if (termAtt.Length >= this.minWordSize)
                {
                    Decompose();
                    // only capture the state if we really need it for producing new tokens
                    if (tokens.Count > 0)
                    {
                        current = CaptureState();
                    }
                }
                // return original token:
                return true;
            }
            else
            {
                return false;
            }
        }
Ejemplo n.º 12
0
 public IteratorAnonymousInnerClassHelper(AttributeSource outerInstance, AttributeSource.State initState)
 {
     this.outerInstance = outerInstance;
     this.initState     = initState;
     state = initState;
 }
Ejemplo n.º 13
0
 public void Reset()
 {
     state = null;
     consumed = true;
     keepOrig = false;
     matched = false;
 }
Ejemplo n.º 14
0
 public IteratorAnonymousClass(AttributeSource.State initState)
 {
     state = initState;
 }