Ejemplo n.º 1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (input.incrementToken())
            {
                // capture state lazily - maybe no SinkFilter accepts this state
                AttributeSource.State state = null;
                foreach (WeakReference <SinkTokenStream> @ref in sinks)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
                    SinkTokenStream sink = @ref.get();
                    if (sink != null)
                    {
                        if (sink.accept(this))
                        {
                            if (state == null)
                            {
                                state = this.captureState();
                            }
                            sink.addState(state);
                        }
                    }
                }
                return(true);
            }

            return(false);
        }
        public override sealed bool IncrementToken()
        {
            if (m_tokens.Count > 0)
            {
                Debug.Assert(current != null);
                CompoundToken token = m_tokens.First.Value;
                m_tokens.Remove(token);
                RestoreState(current); // keep all other attributes untouched
                m_termAtt.SetEmpty().Append(token.Text);
                m_offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
                posIncAtt.PositionIncrement = 0;
                return(true);
            }

            current = null; // not really needed, but for safety
            if (m_input.IncrementToken())
            {
                // Only words longer than minWordSize get processed
                if (m_termAtt.Length >= this.m_minWordSize)
                {
                    Decompose();
                    // only capture the state if we really need it for producing new tokens
                    if (m_tokens.Count > 0)
                    {
                        current = CaptureState();
                    }
                }
                // return original token:
                return(true);
            }
            else
            {
                return(false);
            }
        }
Ejemplo n.º 3
0
                public sealed override bool IncrementToken()
                {
                    if (state != null)
                    {
                        RestoreState(state);
                        payloadAtt.Payload           = null;
                        posIncrAtt.PositionIncrement = 0;
                        termAtt.SetEmpty().Append("b");
                        state = null;
                        return(true);
                    }

                    bool hasNext = m_input.IncrementToken();

                    if (!hasNext)
                    {
                        return(false);
                    }
                    if (char.IsDigit(termAtt.Buffer[0]))
                    {
                        posIncrAtt.PositionIncrement = termAtt.Buffer[0] - '0';
                    }
                    if (first)
                    {
                        // set payload on first position only
                        payloadAtt.Payload = new BytesRef(new byte[] { 100 });
                        first = false;
                    }

                    // index a "synonym" for every token
                    state = CaptureState();
                    return(true);
                }
Ejemplo n.º 4
0
 public void Reset()
 {
     state    = null;
     consumed = true;
     keepOrig = false;
     matched  = false;
 }
Ejemplo n.º 5
0
        public override bool IncrementToken()
        {
            if (m_input.IncrementToken())
            {
                // capture state lazily - maybe no SinkFilter accepts this state
                AttributeSource.State state = null;
                foreach (WeakReference <SinkTokenStream> @ref in sinks)
                {
                    SinkTokenStream sink;
                    if (@ref.TryGetTarget(out sink))
                    {
                        if (sink.Accept(this))
                        {
                            if (state == null)
                            {
                                state = CaptureState();
                            }
                            sink.AddState(state);
                        }
                    }
                }
                return(true);
            }

            return(false);
        }
Ejemplo n.º 6
0
 internal /*private*/ void  AddState(AttributeSource.State state)
 {
     if (it != null)
     {
         throw new System.SystemException("The tee must be consumed before sinks are consumed.");
     }
     cachedStates.AddLast(state);
 }
Ejemplo n.º 7
0
 internal void AddState(AttributeSource.State state)
 {
     if (it != null)
     {
         throw new InvalidOperationException("The tee must be consumed before sinks are consumed.");
     }
     cachedStates.Add(state);
 }
Ejemplo n.º 8
0
 internal void AddState(AttributeSource.State state)
 {
     if (it != null)
     {
         throw IllegalStateException.Create("The tee must be consumed before sinks are consumed.");
     }
     cachedStates.Add(state);
 }
Ejemplo n.º 9
0
 private void FillCache()
 {
     while (m_input.IncrementToken())
     {
         cache.Add(CaptureState());
     }
     // capture final state
     m_input.End();
     finalState = CaptureState();
 }
Ejemplo n.º 10
0
 public override void Reset()
 {
     base.Reset();
     nextToken             = null;
     shingleBufferPosition = 0;
     shingleBuf.Clear();
     numFillerTokensToInsert = 0;
     currentToken            = null;
     hasCurrentToken         = false;
 }
Ejemplo n.º 11
0
 private void FillCache()
 {
     while (Input.IncrementToken())
     {
         Cache.AddLast(CaptureState());
     }
     // capture final state
     Input.End();
     FinalState = CaptureState();
 }
Ejemplo n.º 12
0
 public override sealed void End()
 {
     base.End();
     AttributeSource.State finalState = CaptureState();
     foreach (WeakReference <SinkTokenStream> @ref in sinks)
     {
         SinkTokenStream sink;;
         if (@ref.TryGetTarget(out sink))
         {
             sink.SetFinalState(finalState);
         }
     }
 }
Ejemplo n.º 13
0
            protected override void Swap(int i, int j)
            {
                AttributeSource.State tmp = outerInstance.buffered[i];
                outerInstance.buffered[i] = outerInstance.buffered[j];
                outerInstance.buffered[j] = tmp;

                int tmp2 = outerInstance.startOff[i];
                outerInstance.startOff[i] = outerInstance.startOff[j];
                outerInstance.startOff[j] = tmp2;

                tmp2 = outerInstance.posInc[i];
                outerInstance.posInc[i] = outerInstance.posInc[j];
                outerInstance.posInc[j] = tmp2;
            }
Ejemplo n.º 14
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
        public override void end()
        {
            base.end();
            AttributeSource.State finalState = captureState();
            foreach (WeakReference <SinkTokenStream> @ref in sinks)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
                SinkTokenStream sink = @ref.get();
                if (sink != null)
                {
                    sink.FinalState = finalState;
                }
            }
        }
Ejemplo n.º 15
0
        /*
         * (non-Javadoc)
         *
         * @see org.apache.lucene.analysis.TokenStream#next()
         */
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
            if (tokens != null && tokens.hasNext())
            {
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
                AttributeSource.State state = tokens.next();
                restoreState(state);
                return(true);
            }
            clearAttributes();
            int tokenType = scanner.NextToken;

            if (tokenType == WikipediaTokenizerImpl.YYEOF)
            {
                return(false);
            }
            string type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];

            if (tokenOutput == TOKENS_ONLY || untokenizedTypes.Contains(type) == false)
            {
                setupToken();
            }
            else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.Contains(type) == true)
            {
                collapseTokens(tokenType);
            }
            else if (tokenOutput == BOTH)
            {
                //collapse into a single token, add it to tokens AND output the individual tokens
                //output the untokenized Token first
                collapseAndSaveTokens(tokenType, type);
            }
            int posinc = scanner.PositionIncrement;

            if (first && posinc == 0)
            {
                posinc = 1;   // don't emit posinc=0 for the first token!
            }
            posIncrAtt.PositionIncrement = posinc;
            typeAtt.Type = type;
            first        = false;
            return(true);
        }
Ejemplo n.º 16
0
        /*
         * Get the next token from the input stream and push it on the token buffer.
         * If we encounter a token with position increment > 1, we put filler tokens
         * on the token buffer.
         * <p/>
         * Returns null when the end of the input stream is reached.
         * @return the next token, or null if at end of input stream
         * @throws IOException if the input stream has a problem
         */
        private bool GetNextToken()
        {
            while (true)
            {
                if (numFillerTokensToInsert > 0)
                {
                    if (currentToken == null)
                    {
                        currentToken = CaptureState();
                    }
                    else
                    {
                        RestoreState(currentToken);
                    }
                    numFillerTokensToInsert--;
                    // A filler token occupies no space
                    offsetAtt.SetOffset(offsetAtt.StartOffset, offsetAtt.StartOffset);
                    termAtt.SetTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.Length);
                    return(true);
                }

                if (hasCurrentToken)
                {
                    if (currentToken != null)
                    {
                        RestoreState(currentToken);
                        currentToken = null;
                    }
                    hasCurrentToken = false;
                    return(true);
                }

                if (!input.IncrementToken())
                {
                    return(false);
                }
                hasCurrentToken = true;

                if (posIncrAtt.PositionIncrement > 1)
                {
                    numFillerTokensToInsert = posIncrAtt.PositionIncrement - 1;
                }
            }
        }
Ejemplo n.º 17
0
            public override bool incrementToken()
            {
                // lazy init the iterator
                if (it == null)
                {
                    it = cachedStates.GetEnumerator();
                }

//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
                if (!it.hasNext())
                {
                    return(false);
                }

//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
                AttributeSource.State state = it.next();
                restoreState(state);
                return(true);
            }
Ejemplo n.º 18
0
        /*
         * (non-Javadoc)
         *
         * @see org.apache.lucene.analysis.TokenStream#next()
         */
        public override bool IncrementToken()
        {
            if (tokens != null && tokens.MoveNext())
            {
                AttributeSource.State state = tokens.Current;
                RestoreState(state);
                return(true);
            }
            ClearAttributes();
            int tokenType = scanner.GetNextToken();

            if (tokenType == WikipediaTokenizerImpl.YYEOF)
            {
                return(false);
            }
            string type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];

            if (tokenOutput == TOKENS_ONLY || untokenizedTypes.Contains(type) == false)
            {
                setupToken();
            }
            else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.Contains(type) == true)
            {
                collapseTokens(tokenType);
            }
            else if (tokenOutput == BOTH)
            {
                //collapse into a single token, add it to tokens AND output the individual tokens
                //output the untokenized Token first
                collapseAndSaveTokens(tokenType, type);
            }
            int posinc = scanner.PositionIncrement;

            if (first && posinc == 0)
            {
                posinc = 1; // don't emit posinc=0 for the first token!
            }
            posIncrAtt.PositionIncrement = posinc;
            typeAtt.Type = type;
            first        = false;
            return(true);
        }
Ejemplo n.º 19
0
 public sealed override bool IncrementToken()
 {
     if (permuterms.Count > 0)
     {
         char[] permuterm = permuterms.Pop();
         RestoreState(current);
         termAttr.CopyBuffer(permuterm, 0, permuterm.Length);
         posIncAttr.PositionIncrement = 0;
         return(true);
     }
     if (!input.IncrementToken())
     {
         return(false);
     }
     if (addPermuterms())
     {
         current = CaptureState();
     }
     return(true);
 }
Ejemplo n.º 20
0
        public override bool IncrementToken()
        {
            if (synonymStack.Count > 0)
            {
                var syn = synonymStack.Pop();
                RestoreState(current);
                termAtt.SetTermBuffer(syn);
                posIncrAtt.PositionIncrement = 0;
                return(true);
            }

            if (!input.IncrementToken())
            {
                return(false);
            }

            if (addAliasesToStack())
            {
                current = CaptureState();
            }

            return(true);
        }
Ejemplo n.º 21
0
                public override bool IncrementToken()
                {
                    if (state != null)
                    {
                        RestoreState(state);
                        payloadAtt.Payload = null;
                        posIncrAtt.PositionIncrement = 0;
                        termAtt.SetEmpty().Append("b");
                        state = null;
                        return true;
                    }

                    bool hasNext = input.IncrementToken();
                    if (!hasNext)
                    {
                        return false;
                    }
                    if (char.IsDigit(termAtt.Buffer()[0]))
                    {
                        posIncrAtt.PositionIncrement = termAtt.Buffer()[0] - '0';
                    }
                    if (first)
                    {
                        // set payload on first position only
                        payloadAtt.Payload = new BytesRef(new byte[] { 100 });
                        first = false;
                    }

                    // index a "synonym" for every token
                    state = CaptureState();
                    return true;
                }
Ejemplo n.º 22
0
 public sealed override void Reset()
 {
     base.Reset();
     first = true;
     state = null;
 }
Ejemplo n.º 23
0
		public virtual void reset()
		{
		  state = null;
		  consumed = true;
		  keepOrig = false;
		  matched = false;
		}
Ejemplo n.º 24
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
        public override void reset()
        {
            base.reset();
            tokens.Clear();
            current = null;
        }
Ejemplo n.º 25
0
        /* (non-Javadoc)
         * @see org.apache.lucene.analysis.TokenStream#next()
         */
        public sealed override bool IncrementToken()
        {
            while (true)
            {
                if (nextToken == null)
                {
                    if (!FillShingleBuffer())
                    {
                        return(false);
                    }
                }

                nextToken = shingleBuf.First.Value;

                if (outputUnigrams)
                {
                    if (shingleBufferPosition == 0)
                    {
                        RestoreState(nextToken);
                        posIncrAtt.PositionIncrement = 1;
                        shingleBufferPosition++;
                        return(true);
                    }
                }
                else if (shingleBufferPosition % this.maxShingleSize == 0)
                {
                    shingleBufferPosition++;
                }

                if (shingleBufferPosition < shingleBuf.Count)
                {
                    RestoreState(nextToken);
                    typeAtt.Type = tokenType;
                    offsetAtt.SetOffset(offsetAtt.StartOffset, endOffsets[shingleBufferPosition]);
                    StringBuilder buf        = shingles[shingleBufferPosition];
                    int           termLength = buf.Length;
                    char[]        TermBuffer = termAtt.TermBuffer();
                    if (TermBuffer.Length < termLength)
                    {
                        TermBuffer = termAtt.ResizeTermBuffer(termLength);
                    }
                    buf.CopyTo(0, TermBuffer, 0, termLength);
                    termAtt.SetTermLength(termLength);
                    if ((!outputUnigrams) && shingleBufferPosition % this.maxShingleSize == 1)
                    {
                        posIncrAtt.PositionIncrement = 1;
                    }
                    else
                    {
                        posIncrAtt.PositionIncrement = 0;
                    }
                    shingleBufferPosition++;
                    if (shingleBufferPosition == shingleBuf.Count)
                    {
                        nextToken             = null;
                        shingleBufferPosition = 0;
                    }
                    return(true);
                }
                else
                {
                    nextToken             = null;
                    shingleBufferPosition = 0;
                }
            }
        }
 public override void Reset()
 {
     base.Reset();
     m_tokens.Clear();
     current = null;
 }
Ejemplo n.º 27
0
 public override void Reset()
 {
     base.Reset();
     first = true;
     state = null;
 }
Ejemplo n.º 28
0
 public virtual void Add(AttributeSource.State state)
 {
     InputTokens.Add(state);
 }
Ejemplo n.º 29
0
 internal void SetFinalState(AttributeSource.State finalState)
 {
     this.finalState = finalState;
 }