/** Returns the next token in the stream, or null at EOS. */ public override bool IncrementToken() { while (true) { if (curTermBuffer == null) { if (!input.IncrementToken()) { return(false); } else { curTermBuffer = (char[])termAtt.TermBuffer().Clone(); curTermLength = termAtt.TermLength(); curGramSize = minGram; curPos = 0; tokStart = offsetAtt.StartOffset; } } while (curGramSize <= maxGram) { while (curPos + curGramSize <= curTermLength) { // while there is input ClearAttributes(); termAtt.SetTermBuffer(curTermBuffer, curPos, curGramSize); offsetAtt.SetOffset(tokStart + curPos, tokStart + curPos + curGramSize); curPos++; return(true); } curGramSize++; // increase n-gram size curPos = 0; } curTermBuffer = null; } }
public override bool IncrementToken() { int positionIncrement = 0; if (_buffer == null || _offset >= _length) { if (!_input.IncrementToken()) { return(false); } _offset = 0; _buffer = _termAttribute.TermBuffer(); _length = _termAttribute.TermLength(); positionIncrement++; _offsetInStream++; } _offsetAttribute.SetOffset(_offsetInStream, _offsetInStream + 1); _offsetInStream++; positionIncrement++; _positionIncrementAttribute.SetPositionIncrement(positionIncrement); _termAttribute.SetTermLength(1); _termAttribute.SetTermBuffer(_buffer[_offset++].ToString()); return(true); }
/** Returns the next token in the stream, or null at EOS. */ public override bool IncrementToken() { while (true) { if (_curTermBuffer == null) { if (!input.IncrementToken()) { return(false); } else { _curTermBuffer = (char[])_termAtt.TermBuffer().Clone(); _curTermLength = _termAtt.TermLength(); _curGramSize = _minGram; _curPos = 0; _tokStart = _offsetAtt.StartOffset; } } while (_curGramSize <= _maxGram) { while (_curPos + _curGramSize <= _curTermLength) { // while there is input ClearAttributes(); _termAtt.SetTermBuffer(_curTermBuffer, _curPos, _curGramSize); _offsetAtt.SetOffset(_tokStart + _curPos, _tokStart + _curPos + _curGramSize); _curPos++; return(true); } _curGramSize++; // increase n-gram size _curPos = 0; } _curTermBuffer = null; } }
public override bool IncrementToken() { if (input.IncrementToken()) { var newLength = _stemmer.Stem(_termAttr.TermBuffer(), _termAttr.TermLength()); _termAttr.SetTermLength(newLength); return(true); } return(false); }
public override bool IncrementToken() { //if (_termAtt.Term.Length <= 2) // return false; if (input.IncrementToken()) { string normalized = _normalizer.Normalize(_termAtt.Term, _termAtt.TermLength()); _termAtt.SetTermBuffer(normalized); return(true); } return(false); }
public override bool IncrementToken() { if (input.IncrementToken()) { int newlen = normalizer.Normalize(termAtt.TermBuffer(), termAtt.TermLength()); termAtt.SetTermLength(newlen); return(true); } else { return(false); } }
public override bool IncrementToken() { if (input.IncrementToken()) { char[] termBuffer = termAtt.TermBuffer(); var termText = new String(termBuffer, 0, termAtt.TermLength()); byte[] collationKey = cultureInfo.CompareInfo.GetSortKey(termText).KeyData; int encodedLength = IndexableBinaryStringTools_UsingArrays.GetEncodedLength(collationKey); if (encodedLength > termBuffer.Length) { termBuffer = termAtt.ResizeTermBuffer(encodedLength); } termAtt.SetTermLength(encodedLength); IndexableBinaryStringTools_UsingArrays.Encode(collationKey, termBuffer); return(true); } return(false); }
public override bool IncrementToken() { if (input.IncrementToken()) { int len = termAtt.TermLength(); if (marker != NOMARKER) { len++; termAtt.ResizeTermBuffer(len); termAtt.TermBuffer()[len - 1] = marker; } Reverse(termAtt.TermBuffer(), len); termAtt.SetTermLength(len); return(true); } else { return(false); } }
public override bool IncrementToken() { if (input.IncrementToken() == false) { return(false); } var termBuffer = _termAtt.TermBuffer(); var termText = new string(termBuffer, 0, _termAtt.TermLength()); var collationKey = GetCollationKey(termText); var encodedLength = IndexableBinaryStringTools_UsingArrays.GetEncodedLength(collationKey); if (encodedLength > termBuffer.Length) { termBuffer = _termAtt.ResizeTermBuffer(encodedLength); } _termAtt.SetTermLength(encodedLength); IndexableBinaryStringTools_UsingArrays.Encode(collationKey, termBuffer); return(true); }
public static IEnumerable <string> TokensFromAnalysis(Analyzer analyzer, String text) { TokenStream stream = analyzer.TokenStream("contents", new StringReader(text)); List <string> result = new List <string>(); TermAttribute tokenAttr = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); while (stream.IncrementToken()) { Console.WriteLine("Buffer:={0}, Length:={1}, Term:={2}".FormatWith(tokenAttr.TermBuffer(), tokenAttr.TermLength(), tokenAttr.Term())); result.Add(tokenAttr.Term()); } //tokenAttr. stream.End(); stream.Close(); return(result); }