UniqueStems() 공개 메소드

Find the unique stem(s) of the provided word.
public UniqueStems ( String word ) : IEnumerable
word String Word to find the stems for.
리턴 IEnumerable
예제 #1
0
        public override Boolean IncrementToken()
        {
            if (_buffer.Any())
            {
                var nextStem = _buffer.Dequeue();

                RestoreState(_savedState);
                _posIncAtt.PositionIncrement = 0;
                _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength);
                return(true);
            }

            if (!input.IncrementToken())
            {
                return(false);
            }

            var newTerms = _dedup
                               ? _stemmer.UniqueStems(_termAtt.Term)
                               : _stemmer.Stem(_termAtt.Term);

            foreach (var newTerm in newTerms)
            {
                _buffer.Enqueue(newTerm);
            }

            if (_buffer.Count == 0)
            {
                // originaly: we do not know this word, return it unchanged
                // changed: apply SlovakStemmer on words not found in dictionary (possible named entities)
                var currentTerm = new string(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
                if (!string.IsNullOrEmpty(currentTerm))
                {
                    _slovakStemmer.Stem(_termAtt.TermBuffer(), _termAtt.TermLength(), out char[] newTerm, out var newLength);
                    _termAtt.SetTermBuffer(newTerm, 0, newLength);
                    _termAtt.SetTermLength(newLength);
                }
                return(true);
            }

            var stem = _buffer.Dequeue();

            _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength);

            if (_buffer.Count > 0)
            {
                _savedState = CaptureState();
            }

            return(true);
        }
        public override Boolean IncrementToken()
        {
            if (_buffer.Any())
            {
                var nextStem = _buffer.Dequeue();

                RestoreState(_savedState);
                _posIncAtt.SetPositionIncrement(0);
                _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength);
                return(true);
            }

            if (!input.IncrementToken())
            {
                return(false);
            }

            var newTerms = _dedup
                               ? _stemmer.UniqueStems(_termAtt.Term())
                               : _stemmer.Stem(_termAtt.Term());

            foreach (var newTerm in newTerms)
            {
                _buffer.Enqueue(newTerm);
            }

            if (_buffer.Count == 0)
            {
                // we do not know this word, return it unchanged
                return(true);
            }

            var stem = _buffer.Dequeue();

            _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength);

            if (_buffer.Count > 0)
            {
                _savedState = CaptureState();
            }

            return(true);
        }