Exemple #1
0
        /// <summary>
        /// Returns <c>true</c> if there is a token from the buffer, or <c>false</c> if it is exhausted.
        /// </summary>
        /// <returns><c>true</c> if there is a token from the buffer, or <c>false</c> if it is exhausted.</returns>
        private bool IncrementTokenBuffer()
        {
            int start = breaker.Current;

            if (start == BreakIterator.Done)
            {
                return(false); // BreakIterator exhausted
            }
            // find the next set of boundaries, skipping over non-tokens (rule status 0)
            int end = breaker.Next();

            while (end != BreakIterator.Done && breaker.RuleStatus == 0)
            {
                start = end;
                end   = breaker.Next();
            }

            if (end == BreakIterator.Done)
            {
                return(false); // BreakIterator exhausted
            }
            termAtt.CopyBuffer(buffer, start, end - start);
            offsetAtt.SetOffset(CorrectOffset(offset + start), CorrectOffset(offset + end));
            typeAtt.Type   = config.GetType(breaker.ScriptCode, breaker.RuleStatus);
            scriptAtt.Code = breaker.ScriptCode;

            return(true);
        }
Exemple #2
0
        /// <summary>
        /// Returns true if there is a token from the buffer, or null if it is exhausted.
        /// </summary>
        /// <returns>true if there is a token from the buffer, or null if it is exhausted.</returns>
        private bool IncrementTokenBuffer()
        {
            int start = breaker.Current;

            if (start == BreakIterator.Done)
            {
                return(false); // BreakIterator exhausted
            }
            // find the next set of boundaries, skipping over non-tokens (rule status 0)
            int end = breaker.Next();

            // LUCENENET specific - ICU 60.1 does not set the rule status back to 0,
            // so we need to explicitly check whether we went out of bounds.
            // This is more efficient anyway, since we don't call Next() twice in
            // this case.
            if (end == BreakIterator.Done)
            {
                return(false); // BreakIterator exhausted
            }
            while (start != BreakIterator.Done && breaker.RuleStatus == 0)
            {
                start = end;
                end   = breaker.Next();
            }

            if (start == BreakIterator.Done)
            {
                return(false); // BreakIterator exhausted
            }
            termAtt.CopyBuffer(buffer, start, end - start);
            offsetAtt.SetOffset(CorrectOffset(offset + start), CorrectOffset(offset + end));
            typeAtt.Type   = config.GetType(breaker.ScriptCode, breaker.RuleStatus);
            scriptAtt.Code = breaker.ScriptCode;

            return(true);
        }