示例#1
0
        public override bool IncrementToken()
        {
            ClearAttributes();
            position++;
            if (position < tokens.Count)
            {
                var token = tokens[position];
                termAtt.SetTermBuffer(token.Word);
                offsetAtt.SetOffset(token.StartIndex, token.EndIndex);
                typeAtt.Type = "Jieba";
                return(true);
            }

            End();
            return(false);
        }
示例#2
0
 /*
  * Returns the next token in the stream, or null at EOS
  */
 public sealed override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         String term = termAtt.Term;
         String s    = stemmer.Stem(term);
         if (s != null && !s.Equals(term))
         {
             termAtt.SetTermBuffer(s);
         }
         return(true);
     }
     else
     {
         return(false);
     }
 }
        public override bool IncrementToken()
        {
            ClearAttributes();
            Token token = NextToken(reusableToken);

            if (tokenQueue != null)
            {
                termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
                offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
                typeAtt.Type = token.Type;
                return(true);
            }
            else
            {
                End();
                return(false);
            }
        }
示例#4
0
            public override bool IncrementToken()
            {
                ClearAttributes();

                if (_index >= _testToken.Length)
                {
                    return(false);
                }

                Token t = _testToken[_index++];

                _termAtt.SetTermBuffer(t.TermBuffer(), 0, t.TermLength());
                _offsetAtt.SetOffset(t.StartOffset, t.EndOffset);
                _posIncrAtt.PositionIncrement = t.PositionIncrement;
                _typeAtt.Type = TypeAttribute.DEFAULT_TYPE;

                return(true);
            }
示例#5
0
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         var currentTerm = new string(termAtt.TermBuffer(), 0, termAtt.TermLength());
         if (!string.IsNullOrEmpty(currentTerm))
         {
             stemmer.Stem(termAtt.TermBuffer(), termAtt.TermLength(), out char[] newTerm, out var newLength);
             termAtt.SetTermBuffer(newTerm, 0, newLength);
             termAtt.SetTermLength(newLength);
         }
         return(true);
     }
     else
     {
         return(false);
     }
 }
示例#6
0
        public override bool IncrementToken()
        {
            ClearAttributes();
            Word word = mmSeg.Next();

            if (word != null)
            {
                termAtt.SetTermBuffer(word.Sen, word.WordOffset, word.Length);
                offsetAtt.SetOffset(word.StartOffset, word.EndOffset);
                typeAtt.Type = word.Type;
                return(true);
            }
            else
            {
                End();
                return(false);
            }
        }
示例#7
0
        /*
         * Get the next token from the input stream and push it on the token buffer.
         * If we encounter a token with position increment > 1, we put filler tokens
         * on the token buffer.
         * <p/>
         * Returns null when the end of the input stream is reached.
         * @return the next token, or null if at end of input stream
         * @throws IOException if the input stream has a problem
         */
        private bool GetNextToken()
        {
            while (true)
            {
                if (numFillerTokensToInsert > 0)
                {
                    if (currentToken == null)
                    {
                        currentToken = CaptureState();
                    }
                    else
                    {
                        RestoreState(currentToken);
                    }
                    numFillerTokensToInsert--;
                    // A filler token occupies no space
                    offsetAtt.SetOffset(offsetAtt.StartOffset, offsetAtt.StartOffset);
                    termAtt.SetTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.Length);
                    return(true);
                }

                if (hasCurrentToken)
                {
                    if (currentToken != null)
                    {
                        RestoreState(currentToken);
                        currentToken = null;
                    }
                    hasCurrentToken = false;
                    return(true);
                }

                if (!input.IncrementToken())
                {
                    return(false);
                }
                hasCurrentToken = true;

                if (posIncrAtt.PositionIncrement > 1)
                {
                    numFillerTokensToInsert = posIncrAtt.PositionIncrement - 1;
                }
            }
        }
示例#8
0
 /// <summary>Returns the next input Token, after being stemmed </summary>
 public sealed override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         String originalTerm = termAtt.Term;
         stemmer.SetCurrent(originalTerm);
         stemmer.Stem();
         String finalTerm = stemmer.GetCurrent();
         // Don't bother updating, if it is unchanged.
         if (!originalTerm.Equals(finalTerm))
         {
             termAtt.SetTermBuffer(finalTerm);
         }
         return(true);
     }
     else
     {
         return(false);
     }
 }
        public override bool IncrementToken()
        {
            if (!input.IncrementToken())
            {
                return(false);
            }

            string version = _termAttribute.Term;

            NuGetVersion nuGetVersion;

            if (NuGetVersion.TryParse(version, out nuGetVersion))
            {
                version = nuGetVersion.ToNormalizedString();
            }

            _termAttribute.SetTermBuffer(version);

            return(true);
        }
示例#10
0
 /// <summary>
 ///
 /// </summary>
 /// <remarks></remarks>
 /// <seealso cref=""/>
 /// <param>NA</param>
 /// <returns></returns>
 public override bool IncrementToken()
 {
     if (synonymStack.Count > 0)
     {
         String syn = synonymStack.Pop();
         RestoreState(current);
         termAtt.SetTermBuffer(syn);
         posIncrAtt.PositionIncrement = 0;
         return(true);
     }
     if (!input.IncrementToken())
     {
         return(false);
     }
     if (addAliasesToStack())
     {
         current = CaptureState();
     }
     return(true);
 }
示例#11
0
 /*
  * <returns>Returns the next token in the stream, or null at EOS.</returns>
  */
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         string term = termAtt.Term;
         // Check the exclusion table.
         if (exclusions == null || !exclusions.Contains(term))
         {
             string s = stemmer.Stem(term);
             // If not stemmed, don't waste the time adjusting the token.
             if ((s != null) && !s.Equals(term))
             {
                 termAtt.SetTermBuffer(s);
             }
         }
         return(true);
     }
     else
     {
         return(false);
     }
 }
示例#12
0
        public override sealed bool IncrementToken()
        {
            if (Matrix == null)
            {
                Matrix = new Matrix.Matrix();

                // fill matrix with maximumShingleSize columns
                while (Matrix.Columns.Count < MaximumShingleSize && ReadColumn())
                {
                    // this loop looks ugly
                }
            }

            // This loop exists in order to avoid recursive calls to the next method
            // as the complexity of a large matrix
            // then would require a multi gigabyte sized stack.
            Token token;

            do
            {
                token = ProduceNextToken(_reusableToken);
            } while (token == _requestNextToken);

            if (token == null)
            {
                return(false);
            }

            ClearAttributes();

            _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
            _posIncrAtt.PositionIncrement = token.PositionIncrement;
            _flagsAtt.Flags = token.Flags;
            _offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
            _typeAtt.Type       = token.Type;
            _payloadAtt.Payload = token.Payload;

            return(true);
        }
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         char[] buffer = termAtt.TermBuffer();
         int    length = termAtt.TermLength();
         // If no characters actually require rewriting then we
         // just return token as-is:
         for (int i = 0; i < length; i++)
         {
             char c = buffer[i];
             if (c >= '\u00c0' && c <= '\uFB06')
             {
                 RemoveAccents(buffer, length);
                 termAtt.SetTermBuffer(output, 0, outputPos);
                 break;
             }
         }
         return(true);
     }
     return(false);
 }
示例#14
0
            public override bool IncrementToken()
            {
                if (!iter.MoveNext())
                {
                    return(false);
                }

                T obj = iter.Current;

                if (obj == null)
                {
                    throw new ArgumentException("keyword must not be null");
                }

                String term = obj.ToString();

                ClearAttributes();
                termAtt.SetTermBuffer(term);
                offsetAtt.SetOffset(start, start + termAtt.TermLength());
                start += term.Length + 1; // separate words by 1 (blank) character
                return(true);
            }
示例#15
0
 public override bool IncrementToken()
 {
     if (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken > 0)
     {
         termAtt.SetTermBuffer("multi" + (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken + 1));
         offsetAtt.SetOffset(prevStartOffset, prevEndOffset);
         typeAtt.Type = prevType;
         posIncrAtt.PositionIncrement = 0;
         Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken--;
         return(true);
     }
     else
     {
         bool next = input.IncrementToken();
         if (next == false)
         {
             return(false);
         }
         prevType        = typeAtt.Type;
         prevStartOffset = offsetAtt.StartOffset;
         prevEndOffset   = offsetAtt.EndOffset;
         System.String text = termAtt.Term;
         if (text.Equals("triplemulti"))
         {
             Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 2;
             return(true);
         }
         else if (text.Equals("multi"))
         {
             Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 1;
             return(true);
         }
         else
         {
             return(true);
         }
     }
 }
示例#16
0
            public sealed override bool IncrementToken()
            {
                if (matcher == null)
                {
                    return(false);
                }
                ClearAttributes();
                while (true)
                { // loop takes care of leading and trailing boundary cases
                    int  start = pos;
                    int  end;
                    bool isMatch = matcher.Success;
                    if (isMatch)
                    {
                        end     = matcher.Index;
                        pos     = matcher.Index + matcher.Length;
                        matcher = matcher.NextMatch();
                    }
                    else
                    {
                        end     = str.Length;
                        matcher = null; // we're finished
                    }

                    if (start != end)
                    { // non-empty match (header/trailer)
                        String text = str.Substring(start, end - start);
                        if (toLowerCase)
                        {
                            text = text.ToLower(locale);
                        }
                        termAtt.SetTermBuffer(text);
                        offsetAtt.SetOffset(start, end);
                        return(true);
                    }
                    return(false);
                }
            }
示例#17
0
 public override bool IncrementToken()
 {
     if (tokenUpto >= Enclosing_Instance.tokens.Length)
     {
         return(false);
     }
     else
     {
         TestToken testToken = Enclosing_Instance.tokens[tokenUpto++];
         ClearAttributes();
         termAtt.SetTermBuffer(testToken.text);
         offsetAtt.SetOffset(testToken.startOffset, testToken.endOffset);
         if (tokenUpto > 1)
         {
             posIncrAtt.PositionIncrement = testToken.pos - Enclosing_Instance.tokens[tokenUpto - 2].pos;
         }
         else
         {
             posIncrAtt.PositionIncrement = testToken.pos + 1;
         }
         return(true);
     }
 }
示例#18
0
 public override bool IncrementToken()
 {
     while (true)
     {
         if (curTermBuffer == null)
         {
             if (!input.IncrementToken())
             {
                 return(false);
             }
             else
             {
                 curTermBuffer = (char[])termAtt.TermBuffer().Clone();
                 curTermLength = termAtt.TermLength();
                 curGramSize   = minGram;
                 tokStart      = offsetAtt.StartOffset;
             }
         }
         if (curGramSize <= maxGram)
         {
             if (!(curGramSize > curTermLength || // if the remaining input is too short, we can't generate any n-grams
                   curGramSize > maxGram))
             {                                    // if we have hit the end of our n-gram size range, quit
                 // grab gramSize chars from front or back
                 int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
                 int end   = start + curGramSize;
                 ClearAttributes();
                 offsetAtt.SetOffset(tokStart + start, tokStart + end);
                 termAtt.SetTermBuffer(curTermBuffer, start, curGramSize);
                 curGramSize++;
                 return(true);
             }
         }
         curTermBuffer = null;
     }
 }
示例#19
0
        /*
         * Increments the {@link TokenStream} with a {@link TermAttribute} without elisioned start
         */
        public override sealed bool IncrementToken()
        {
            if (input.IncrementToken())
            {
                char[] termBuffer = termAtt.TermBuffer();
                int    termLength = termAtt.TermLength();

                int minPoz = int.MaxValue;
                for (int i = 0; i < apostrophes.Length; i++)
                {
                    char apos = apostrophes[i];
                    // The equivalent of String.indexOf(ch)
                    for (int poz = 0; poz < termLength; poz++)
                    {
                        if (termBuffer[poz] == apos)
                        {
                            minPoz = Math.Min(poz, minPoz);
                            break;
                        }
                    }
                }

                // An apostrophe has been found. If the prefix is an article strip it off.
                if (minPoz != int.MaxValue &&
                    articles.Contains(termAtt.TermBuffer(), 0, minPoz))
                {
                    termAtt.SetTermBuffer(termAtt.TermBuffer(), minPoz + 1, termAtt.TermLength() - (minPoz + 1));
                }

                return(true);
            }
            else
            {
                return(false);
            }
        }
 public override bool IncrementToken()
 {
     while (true)
     {
         if (_curTermBuffer == null)
         {
             if (!input.IncrementToken())
             {
                 return(false);
             }
             else
             {
                 _curTermBuffer = (char[])_termAtt.TermBuffer().Clone();
                 _curTermLength = _termAtt.TermLength();
                 _curGramSize   = _minGram;
                 _tokStart      = _offsetAtt.StartOffset;
             }
         }
         if (_curGramSize <= _maxGram)
         {
             if (!(_curGramSize > _curTermLength || // if the remaining input is too short, we can't generate any n-grams
                   _curGramSize > _maxGram))
             {                                      // if we have hit the end of our n-gram size range, quit
                 // grab gramSize chars from front or back
                 int start = _side == Side.Front ? 0 : _curTermLength - _curGramSize;
                 int end   = start + _curGramSize;
                 ClearAttributes();
                 _offsetAtt.SetOffset(_tokStart + start, _tokStart + end);
                 _termAtt.SetTermBuffer(_curTermBuffer, start, _curGramSize);
                 _curGramSize++;
                 return(true);
             }
         }
         _curTermBuffer = null;
     }
 }
示例#21
0
        public override bool IncrementToken()
        {
            if (splittedQueue.Count > 0)
            {
                string splitted = splittedQueue.Dequeue();
                RestoreState(currentState);
                termAtt.SetTermBuffer(splitted);
                posAtt.PositionIncrement = 0;
                return(true);
            }

            if (!input.IncrementToken())
            {
                return(false);
            }

            string currentTerm = termAtt.Term;

            if (currentTerm != null)
            {
                var sb       = new StringBuilder();
                var synonyms = SynonymEngine.GetSynonyms(currentTerm);

                if (synonyms == null || synonyms.Any() == false)
                {
                    return(true);
                }
                foreach (var synonym in synonyms)
                {
                    splittedQueue.Enqueue(synonym.ToLower());
                }
            }

            currentState = CaptureState();
            return(true);
        }
        public override bool IncrementToken()
        {
            if (input.IncrementToken())
            {
                char[] buffer = termAtt.TermBuffer();
                int    length = termAtt.TermLength();

                for (int i = 0; i < length; ++i)
                {
                    char c = buffer[i];
                    if (c >= '\u0080')
                    {
                        FoldToASCII(buffer, length);
                        termAtt.SetTermBuffer(output, 0, outputPos);
                        break;
                    }
                }
                return(true);
            }
            else
            {
                return(false);
            }
        }
示例#23
0
        public override bool IncrementToken()
        {
            ClearAttributes();
            /* how many character(s) has been stored in buffer */

            while (true)
            {
                // loop until we find a non-empty token

                int length = 0;

                /* the position used to create Token */
                int start = offset;

                while (true)
                {
                    // loop until we've found a full token
                    /* current character */
                    char c;

                    offset++;

                    if (bufferIndex >= dataLen)
                    {
                        dataLen     = input.Read(ioBuffer, 0, ioBuffer.Length);
                        bufferIndex = 0;
                    }

                    if (dataLen == 0) // input.Read returns 0 when its empty, not -1, as in java
                    {
                        if (length > 0)
                        {
                            if (preIsTokened == true)
                            {
                                length       = 0;
                                preIsTokened = false;
                            }
                            else
                            {
                                offset--;
                            }

                            break;
                        }
                        else
                        {
                            offset--;
                            return(false);
                        }
                    }
                    else
                    {
                        //get current character
                        c = ioBuffer[bufferIndex++];
                    }

                    //TODO: Using a Regex to determine the UnicodeCategory is probably slower than
                    //      If we just created a small class that would look it up for us, which
                    //      would likely be trivial, however time-consuming.  I can't imagine a Regex
                    //      being fast for this, considering we have to pull a char from the buffer,
                    //      and convert it to a string before we run a regex on it. - cc
                    bool isHalfFullForm = isHalfWidthAndFullWidthForms.Match(c.ToString()).Success;
                    //if the current character is ASCII or Extend ASCII
                    if ((isBasicLatin.Match(c.ToString()).Success) || (isHalfFullForm))
                    {
                        if (isHalfFullForm)
                        {
                            int i = (int)c;
                            if (i >= 65281 && i <= 65374)
                            {
                                // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
                                i = i - 65248;
                                c = (char)i;
                            }
                        }

                        // if the current character is a letter or "_" "+" "#"
                        if (char.IsLetterOrDigit(c) ||
                            ((c == '_') || (c == '+') || (c == '#'))
                            )
                        {
                            if (length == 0)
                            {
                                // "javaC1C2C3C4linux" <br>
                                //      ^--: the current character begin to token the ASCII
                                // letter
                                start = offset - 1;
                            }
                            else if (tokenType == DOUBLE_TOKEN_TYPE)
                            {
                                // "javaC1C2C3C4linux" <br>
                                //              ^--: the previous non-ASCII
                                // : the current character
                                offset--;
                                bufferIndex--;

                                if (preIsTokened == true)
                                {
                                    // there is only one non-ASCII has been stored
                                    length       = 0;
                                    preIsTokened = false;
                                    break;
                                }
                                else
                                {
                                    break;
                                }
                            }

                            // store the LowerCase(c) in the buffer
                            buffer[length++] = char.ToLower(c); // TODO: is java invariant?  If so, this should be ToLowerInvariant()
                            tokenType        = SINGLE_TOKEN_TYPE;

                            // break the procedure if buffer overflowed!
                            if (length == MAX_WORD_LEN)
                            {
                                break;
                            }
                        }
                        else if (length > 0)
                        {
                            if (preIsTokened)
                            {
                                length       = 0;
                                preIsTokened = false;
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        // non-ASCII letter, e.g."C1C2C3C4"
                        if (char.IsLetter(c))
                        {
                            if (length == 0)
                            {
                                start            = offset - 1;
                                buffer[length++] = c;
                                tokenType        = DOUBLE_TOKEN_TYPE;
                            }
                            else
                            {
                                if (tokenType == SINGLE_TOKEN_TYPE)
                                {
                                    offset--;
                                    bufferIndex--;

                                    //return the previous ASCII characters
                                    break;
                                }
                                else
                                {
                                    buffer[length++] = c;
                                    tokenType        = DOUBLE_TOKEN_TYPE;

                                    if (length == 2)
                                    {
                                        offset--;
                                        bufferIndex--;
                                        preIsTokened = true;

                                        break;
                                    }
                                }
                            }
                        }
                        else if (length > 0)
                        {
                            if (preIsTokened == true)
                            {
                                // empty the buffer
                                length       = 0;
                                preIsTokened = false;
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }

                if (length > 0)
                {
                    termAtt.SetTermBuffer(buffer, 0, length);
                    offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
                    typeAtt.Type = TOKEN_TYPE_NAMES[tokenType];
                    return(true);
                }
                else if (dataLen == 0)
                {
                    offset--;
                    return(false);
                }

                // Cycle back and try for the next token (don't
                // return an empty string)
            }
        }
示例#24
0
 public static void Append(this ITermAttribute termAtt, char ch)
 {
     termAtt.SetTermBuffer(termAtt.Term + new string(new[] { ch }));             // TODO: Not optimal, but works
 }
示例#25
0
 public static void Append(this ITermAttribute termAtt, string str)
 {
     termAtt.SetTermBuffer(termAtt.Term + str);             // TODO: Not optimal, but works
 }
示例#26
0
 public void  Reinit(string stringValue, int startOffset, int endOffset)
 {
     termAttribute.SetTermBuffer(stringValue);
     offsetAttribute.SetOffset(startOffset, endOffset);
 }
 /// <summary> Fills TermAttribute with the current token text.</summary>
 internal void  GetText(ITermAttribute t)
 {
     t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
 }
        public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int?finalOffset)
        {
            Assert.IsNotNull(output);
            ICheckClearAttributesAttribute checkClearAtt = ts.AddAttribute <ICheckClearAttributesAttribute>();

            Assert.IsTrue(ts.HasAttribute <ITermAttribute>(), "has no TermAttribute");
            ITermAttribute termAtt = ts.GetAttribute <ITermAttribute>();

            IOffsetAttribute offsetAtt = null;

            if (startOffsets != null || endOffsets != null || finalOffset != null)
            {
                Assert.IsTrue(ts.HasAttribute <IOffsetAttribute>(), "has no OffsetAttribute");
                offsetAtt = ts.GetAttribute <IOffsetAttribute>();
            }

            ITypeAttribute typeAtt = null;

            if (types != null)
            {
                Assert.IsTrue(ts.HasAttribute <ITypeAttribute>(), "has no TypeAttribute");
                typeAtt = ts.GetAttribute <ITypeAttribute>();
            }

            IPositionIncrementAttribute posIncrAtt = null;

            if (posIncrements != null)
            {
                Assert.IsTrue(ts.HasAttribute <IPositionIncrementAttribute>(), "has no PositionIncrementAttribute");
                posIncrAtt = ts.GetAttribute <IPositionIncrementAttribute>();
            }

            ts.Reset();
            for (int i = 0; i < output.Length; i++)
            {
                // extra safety to enforce, that the state is not preserved and also assign bogus values
                ts.ClearAttributes();
                termAtt.SetTermBuffer("bogusTerm");
                if (offsetAtt != null)
                {
                    offsetAtt.SetOffset(14584724, 24683243);
                }
                if (typeAtt != null)
                {
                    typeAtt.Type = "bogusType";
                }
                if (posIncrAtt != null)
                {
                    posIncrAtt.PositionIncrement = 45987657;
                }

                checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
                Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain");

                Assert.AreEqual(output[i], termAtt.Term, "term " + i);
                if (startOffsets != null)
                {
                    Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i);
                }
                if (endOffsets != null)
                {
                    Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i);
                }
                if (types != null)
                {
                    Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
                }
                if (posIncrements != null)
                {
                    Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
                }
            }
            Assert.IsFalse(ts.IncrementToken(), "end of stream");
            ts.End();
            if (finalOffset.HasValue)
            {
                Assert.AreEqual(finalOffset, offsetAtt.EndOffset, "finalOffset ");
            }
            ts.Close();
        }
		/// <summary> Fills TermAttribute with the current token text.</summary>
		internal void  GetText(ITermAttribute t)
		{
			t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
		}
示例#30
0
        public override bool IncrementToken()
        {
            ClearAttributes();

            string nextToken;

            HebMorph.Tokenizer.TokenType tokenType;

            // Used to loop over certain noise cases
            while (true)
            {
                tokenType = hebMorphTokenizer.NextToken(out nextToken);
                if (tokenType == 0)
                {
                    return(false); // EOS
                }
                // Ignore "words" which are actually only prefixes in a single word.
                // This first case is easy to spot, since the prefix and the following word will be
                // separated by a dash marked as a construct (סמיכות) by the Tokenizer
                if ((tokenType & HebMorph.Tokenizer.TokenType.Construct) > 0)
                {
                    if (IsLegalPrefix(nextToken))
                    {
                        continue;
                    }
                }

                // This second case is a bit more complex. We take a risk of splitting a valid acronym or
                // abbrevated word into two, so we send it to an external function to analyze the word, and
                // get a possibly corrected word. Examples for words we expect to simplify by this operation
                // are ה"שטיח", ש"המידע.
                if ((tokenType & HebMorph.Tokenizer.TokenType.Acronym) > 0)
                {
                    nextToken = TryStrippingPrefix(nextToken);

                    // Re-detect acronym, in case it was a false positive
                    if (nextToken.IndexOf('"') == -1)
                    {
                        tokenType |= ~HebMorph.Tokenizer.TokenType.Acronym;
                    }
                }

                break;
            }

            // Record the term string
            if (termAtt.TermLength() < nextToken.Length)
            {
                termAtt.SetTermBuffer(nextToken);
            }
            else // Perform a copy to save on memory operations
            {
                char[] buf = termAtt.TermBuffer();
                nextToken.CopyTo(0, buf, 0, nextToken.Length);
            }
            termAtt.SetTermLength(nextToken.Length);

            offsetAtt.SetOffset(CorrectOffset(hebMorphTokenizer.Offset), CorrectOffset(hebMorphTokenizer.Offset + hebMorphTokenizer.LengthInSource));

            if ((tokenType & HebMorph.Tokenizer.TokenType.Hebrew) > 0)
            {
                if ((tokenType & HebMorph.Tokenizer.TokenType.Acronym) > 0)
                {
                    typeAtt.Type = TokenTypeSignature(TOKEN_TYPES.Acronym);
                }
                if ((tokenType & HebMorph.Tokenizer.TokenType.Construct) > 0)
                {
                    typeAtt.Type = TokenTypeSignature(TOKEN_TYPES.Construct);
                }
                else
                {
                    typeAtt.Type = TokenTypeSignature(TOKEN_TYPES.Hebrew);
                }
            }
            else if ((tokenType & HebMorph.Tokenizer.TokenType.Numeric) > 0)
            {
                typeAtt.Type = TokenTypeSignature(TOKEN_TYPES.Numeric);
            }
            else
            {
                typeAtt.Type = TokenTypeSignature(TOKEN_TYPES.NonHebrew);
            }

            return(true);
        }
 private void SetAttributes(Tuple <string, int, int, int> next)
 {
     _termAttribute.SetTermBuffer(next.Item1);
     _offsetAttribute.SetOffset(next.Item2, next.Item3);
     _positionIncrementAttribute.PositionIncrement = next.Item4;
 }