示例#1
0
        protected internal override bool incrementWord()
        {
            int start = wordBreaker.current();

            if (start == BreakIterator.DONE)
            {
                return(false);  // BreakIterator exhausted
            }

            // find the next set of boundaries, skipping over non-tokens
            int end_Renamed = wordBreaker.next();

            while (end_Renamed != BreakIterator.DONE && !char.IsLetterOrDigit(char.codePointAt(buffer, sentenceStart + start, sentenceEnd)))
            {
                start       = end_Renamed;
                end_Renamed = wordBreaker.next();
            }

            if (end_Renamed == BreakIterator.DONE)
            {
                return(false);  // BreakIterator exhausted
            }

            clearAttributes();
            termAtt.copyBuffer(buffer, sentenceStart + start, end_Renamed - start);
            offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end_Renamed));
            return(true);
        }
示例#2
0
        /// <summary>
        /// return true if there is a token from the buffer, or null if it is
        /// exhausted.
        /// </summary>
        private bool IncrementSentence()
        {
            if (length == 0)     // we must refill the buffer
            {
                return(false);
            }

            while (true)
            {
                int start = iterator.Current();

                if (start == BreakIterator.DONE)
                {
                    return(false);    // BreakIterator exhausted
                }

                // find the next set of boundaries
                int end_Renamed = iterator.next();

                if (end_Renamed == BreakIterator.DONE)
                {
                    return(false);    // BreakIterator exhausted
                }

                setNextSentence(start, end_Renamed);
                if (incrementWord())
                {
                    return(true);
                }
            }
        }
 private void consume(BreakIterator bi, CharacterIterator ci)
 {
     bi.Text = ci;
     while (bi.next() != BreakIterator.DONE)
     {
         ;
     }
 }
示例#4
0
        static CharArrayIterator()
        {
            bool v;

            try
            {
                BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
                bi.Text = "\udb40\udc53";
                bi.next();
                v = false;
            }
            catch (Exception)
            {
                v = true;
            }
            HAS_BUGGY_BREAKITERATORS = v;
        }
	  private void consume(BreakIterator bi, CharacterIterator ci)
	  {
		bi.Text = ci;
		while (bi.next() != BreakIterator.DONE)
		{
		  ;
		}
	  }
示例#6
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (hasMoreTokensInClone)
            {
                int start = breaker.current();
                int end   = breaker.next();
                if (end != BreakIterator.DONE)
                {
                    clonedToken.copyTo(this);
                    termAtt.copyBuffer(clonedTermAtt.buffer(), start, end - start);
                    if (hasIllegalOffsets)
                    {
                        offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
                    }
                    else
                    {
                        offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end);
                    }
                    if (handlePosIncr)
                    {
                        posAtt.PositionIncrement = 1;
                    }
                    return(true);
                }
                hasMoreTokensInClone = false;
            }

            if (!input.incrementToken())
            {
                return(false);
            }

            if (termAtt.length() == 0 || char.UnicodeBlock.of(termAtt.charAt(0)) != char.UnicodeBlock.THAI)
            {
                return(true);
            }

            hasMoreTokensInClone = true;

            // if length by start + end offsets doesn't match the term text then assume
            // this is a synonym and don't adjust the offsets.
            hasIllegalOffsets = offsetAtt.endOffset() - offsetAtt.startOffset() != termAtt.length();

            // we lazy init the cloned token, as in ctor not all attributes may be added
            if (clonedToken == null)
            {
                clonedToken     = cloneAttributes();
                clonedTermAtt   = clonedToken.getAttribute(typeof(CharTermAttribute));
                clonedOffsetAtt = clonedToken.getAttribute(typeof(OffsetAttribute));
            }
            else
            {
                this.copyTo(clonedToken);
            }

            // reinit CharacterIterator
            charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length());
            breaker.Text = charIterator;
            int end = breaker.next();

            if (end != BreakIterator.DONE)
            {
                termAtt.Length = end;
                if (hasIllegalOffsets)
                {
                    offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
                }
                else
                {
                    offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.startOffset() + end);
                }
                // position increment keeps as it is for first token
                return(true);
            }
            return(false);
        }