public override bool IncrementToken() { ClearAttributes(); int length = 0; int start = bufferIndex; char[] buffer = termAtt.TermBuffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input.Read(ioBuffer, 0, ioBuffer.Length); if (dataLen <= 0) { dataLen = 0; // so next offset += dataLen won't decrement offset if (length > 0) { break; } return(false); } bufferIndex = 0; } char c = ioBuffer[bufferIndex++]; if (IsTokenChar(c)) { // if it's a token char if (length == 0) { // start of token start = offset + bufferIndex - 1; } else if (length == buffer.Length) { buffer = termAtt.ResizeTermBuffer(1 + length); } buffer[length++] = Normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) { // buffer overflow! break; } } else if (length > 0) { // at non-Letter w/ chars break; // return 'em } } termAtt.SetTermLength(length); offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length)); return(true); }
private void Init(int bufferSize) { this.done = false; termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); termAtt.ResizeTermBuffer(bufferSize); }
public override bool IncrementToken() { if (!done) { ClearAttributes(); done = true; int upto = 0; char[] buffer = termAtt.TermBuffer(); while (true) { int length = input.Read(buffer, upto, buffer.Length - upto); if (length == 0) { break; } upto += length; if (upto == buffer.Length) { buffer = termAtt.ResizeTermBuffer(1 + buffer.Length); } } termAtt.SetTermLength(upto); finalOffset = CorrectOffset(upto); offsetAtt.SetOffset(CorrectOffset(0), finalOffset); return(true); } return(false); }
private void Init(int bufferSize) { this.done = false; termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); termAtt.ResizeTermBuffer(bufferSize); }
public override bool IncrementToken() { if (!input.IncrementToken()) { // reached EOS -- return null return(false); } if (suffixByTokenType == null) { return(true); } char[] suffix; if (!suffixByTokenType.TryGetValue(typeAtt.Type, out suffix)) { return(true); } char[] buffer = termAtt.TermBuffer(); int length = termAtt.TermLength(); if (buffer.Length <= length) { buffer = termAtt.ResizeTermBuffer(length + suffix.Length); } Array.Copy(suffix, 0, buffer, length, suffix.Length); termAtt.SetTermLength(length + suffix.Length); return(true); }
public override bool IncrementToken() { ClearAttributes(); int length = 0; int start = _bufferIndex; char[] buffer = _termAtt.TermBuffer(); while (true) { if (_bufferIndex >= _dataLen) { _offset += _dataLen; _dataLen = input.Read(_ioBuffer, 0, _ioBuffer.Length); if (_dataLen <= 0) { _dataLen = 0; // so next offset += dataLen won't decrement offset if (length > 0) { break; } return(false); } _bufferIndex = 0; } char c = _ioBuffer[_bufferIndex++]; if (Helper.IsTokenChar(c)) { // if it's a token char if (length == 0) { // start of token start = _offset + _bufferIndex - 1; } else if (length == buffer.Length) { buffer = _termAtt.ResizeTermBuffer(1 + length); } buffer[length++] = Helper.Normalize(c); // buffer it, normalized } else if (length > 0) { // at non-Letter w/ chars break; // return 'em } } _termAtt.SetTermLength(length); _offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length)); return(true); }
public override bool IncrementToken() { if (input.IncrementToken()) { int len = termAtt.TermLength(); if (marker != NOMARKER) { len++; termAtt.ResizeTermBuffer(len); termAtt.TermBuffer()[len - 1] = marker; } Reverse(termAtt.TermBuffer(), len); termAtt.SetTermLength(len); return(true); } else { return(false); } }