Beispiel #1
0
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         char[] termBuffer    = termAtt.TermBuffer();
         var    termText      = new String(termBuffer, 0, termAtt.TermLength());
         byte[] collationKey  = cultureInfo.CompareInfo.GetSortKey(termText).KeyData;
         int    encodedLength = IndexableBinaryStringTools_UsingArrays.GetEncodedLength(collationKey);
         if (encodedLength > termBuffer.Length)
         {
             termAtt.ResizeTermBuffer(encodedLength);
         }
         termAtt.SetTermLength(encodedLength);
         IndexableBinaryStringTools_UsingArrays.Encode(collationKey, termBuffer);
         return(true);
     }
     return(false);
 }
Beispiel #2
0
        public static IEnumerable <string> TokensFromAnalysis(Analyzer analyzer, String text)
        {
            TokenStream   stream    = analyzer.TokenStream("contents", new StringReader(text));
            List <string> result    = new List <string>();
            TermAttribute tokenAttr = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            while (stream.IncrementToken())
            {
                Console.WriteLine("Buffer:={0}, Length:={1}, Term:={2}".FormatWith(tokenAttr.TermBuffer(), tokenAttr.TermLength(), tokenAttr.Term()));
                result.Add(tokenAttr.Term());
            }



            //tokenAttr.

            stream.End();
            stream.Close();

            return(result);
        }
        public override bool IncrementToken()
        {
            if (input.IncrementToken() == false)
            {
                return(false);
            }

            var termBuffer    = _termAtt.TermBuffer();
            var termText      = new string(termBuffer, 0, _termAtt.TermLength());
            var collationKey  = GetCollationKey(termText);
            var encodedLength = IndexableBinaryStringTools_UsingArrays.GetEncodedLength(collationKey);

            if (encodedLength > termBuffer.Length)
            {
                termBuffer = _termAtt.ResizeTermBuffer(encodedLength);
            }

            _termAtt.SetTermLength(encodedLength);
            IndexableBinaryStringTools_UsingArrays.Encode(collationKey, termBuffer);

            return(true);
        }
Beispiel #4
0
        /** Returns the next token in the stream, or null at EOS. */
        public override bool IncrementToken()
        {
            while (true)
            {
                if (_curTermBuffer == null)
                {
                    if (!input.IncrementToken())
                    {
                        return(false);
                    }
                    else
                    {
                        _curTermBuffer = (char[])_termAtt.TermBuffer().Clone();
                        _curTermLength = _termAtt.TermLength();
                        _curGramSize   = _minGram;
                        _curPos        = 0;
                        _tokStart      = _offsetAtt.StartOffset;
                    }
                }
                while (_curGramSize <= _maxGram)
                {
                    if (_curPos + _curGramSize <= _curTermLength)
                    {     // while there is input
                        ClearAttributes();
                        _termAtt.SetTermBuffer(_curTermBuffer, _curPos, _curGramSize);
                        _offsetAtt.SetOffset(_tokStart + _curPos, _tokStart + _curPos + _curGramSize);
                        _curPos++;
                        return(true);
                    }

                    _curGramSize++;                         // increase n-gram size
                    _curPos = 0;
                }
                _curTermBuffer = null;
            }
        }