protected internal override bool incrementWord() { int start = wordBreaker.current(); if (start == BreakIterator.DONE) { return(false); // BreakIterator exhausted } // find the next set of boundaries, skipping over non-tokens int end_Renamed = wordBreaker.next(); while (end_Renamed != BreakIterator.DONE && !char.IsLetterOrDigit(char.codePointAt(buffer, sentenceStart + start, sentenceEnd))) { start = end_Renamed; end_Renamed = wordBreaker.next(); } if (end_Renamed == BreakIterator.DONE) { return(false); // BreakIterator exhausted } clearAttributes(); termAtt.copyBuffer(buffer, sentenceStart + start, end_Renamed - start); offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end_Renamed)); return(true); }
/// <summary> /// return true if there is a token from the buffer, or null if it is /// exhausted. /// </summary> private bool IncrementSentence() { if (length == 0) // we must refill the buffer { return(false); } while (true) { int start = iterator.Current(); if (start == BreakIterator.DONE) { return(false); // BreakIterator exhausted } // find the next set of boundaries int end_Renamed = iterator.next(); if (end_Renamed == BreakIterator.DONE) { return(false); // BreakIterator exhausted } setNextSentence(start, end_Renamed); if (incrementWord()) { return(true); } } }
private void consume(BreakIterator bi, CharacterIterator ci) { bi.Text = ci; while (bi.next() != BreakIterator.DONE) { ; } }
static CharArrayIterator() { bool v; try { BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US); bi.Text = "\udb40\udc53"; bi.next(); v = false; } catch (Exception) { v = true; } HAS_BUGGY_BREAKITERATORS = v; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException public override bool incrementToken() { if (hasMoreTokensInClone) { int start = breaker.current(); int end = breaker.next(); if (end != BreakIterator.DONE) { clonedToken.copyTo(this); termAtt.copyBuffer(clonedTermAtt.buffer(), start, end - start); if (hasIllegalOffsets) { offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset()); } else { offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end); } if (handlePosIncr) { posAtt.PositionIncrement = 1; } return(true); } hasMoreTokensInClone = false; } if (!input.incrementToken()) { return(false); } if (termAtt.length() == 0 || char.UnicodeBlock.of(termAtt.charAt(0)) != char.UnicodeBlock.THAI) { return(true); } hasMoreTokensInClone = true; // if length by start + end offsets doesn't match the term text then assume // this is a synonym and don't adjust the offsets. hasIllegalOffsets = offsetAtt.endOffset() - offsetAtt.startOffset() != termAtt.length(); // we lazy init the cloned token, as in ctor not all attributes may be added if (clonedToken == null) { clonedToken = cloneAttributes(); clonedTermAtt = clonedToken.getAttribute(typeof(CharTermAttribute)); clonedOffsetAtt = clonedToken.getAttribute(typeof(OffsetAttribute)); } else { this.copyTo(clonedToken); } // reinit CharacterIterator charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length()); breaker.Text = charIterator; int end = breaker.next(); if (end != BreakIterator.DONE) { termAtt.Length = end; if (hasIllegalOffsets) { offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset()); } else { offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.startOffset() + end); } // position increment keeps as it is for first token return(true); } return(false); }