public override bool IncrementToken() { if (input.IncrementToken()) { char[] termBuffer = termAtt.TermBuffer(); var termText = new String(termBuffer, 0, termAtt.TermLength()); byte[] collationKey = cultureInfo.CompareInfo.GetSortKey(termText).KeyData; int encodedLength = IndexableBinaryStringTools_UsingArrays.GetEncodedLength(collationKey); if (encodedLength > termBuffer.Length) { termAtt.ResizeTermBuffer(encodedLength); } termAtt.SetTermLength(encodedLength); IndexableBinaryStringTools_UsingArrays.Encode(collationKey, termBuffer); return(true); } return(false); }
public static IEnumerable <string> TokensFromAnalysis(Analyzer analyzer, String text) { TokenStream stream = analyzer.TokenStream("contents", new StringReader(text)); List <string> result = new List <string>(); TermAttribute tokenAttr = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); while (stream.IncrementToken()) { Console.WriteLine("Buffer:={0}, Length:={1}, Term:={2}".FormatWith(tokenAttr.TermBuffer(), tokenAttr.TermLength(), tokenAttr.Term())); result.Add(tokenAttr.Term()); } //tokenAttr. stream.End(); stream.Close(); return(result); }
public override bool IncrementToken() { if (input.IncrementToken() == false) { return(false); } var termBuffer = _termAtt.TermBuffer(); var termText = new string(termBuffer, 0, _termAtt.TermLength()); var collationKey = GetCollationKey(termText); var encodedLength = IndexableBinaryStringTools_UsingArrays.GetEncodedLength(collationKey); if (encodedLength > termBuffer.Length) { termBuffer = _termAtt.ResizeTermBuffer(encodedLength); } _termAtt.SetTermLength(encodedLength); IndexableBinaryStringTools_UsingArrays.Encode(collationKey, termBuffer); return(true); }
/** Returns the next token in the stream, or null at EOS. */ public override bool IncrementToken() { while (true) { if (_curTermBuffer == null) { if (!input.IncrementToken()) { return(false); } else { _curTermBuffer = (char[])_termAtt.TermBuffer().Clone(); _curTermLength = _termAtt.TermLength(); _curGramSize = _minGram; _curPos = 0; _tokStart = _offsetAtt.StartOffset; } } while (_curGramSize <= _maxGram) { if (_curPos + _curGramSize <= _curTermLength) { // while there is input ClearAttributes(); _termAtt.SetTermBuffer(_curTermBuffer, _curPos, _curGramSize); _offsetAtt.SetOffset(_tokStart + _curPos, _tokStart + _curPos + _curGramSize); _curPos++; return(true); } _curGramSize++; // increase n-gram size _curPos = 0; } _curTermBuffer = null; } }