Stem() public method

Find the stem(s) of the provided word.
public Stem ( String word ) : IEnumerable
word String Word to find the stems for.
return IEnumerable
        public void TestStem_fietsenFiets_NlNL() {
            var dictionary = HunspellDictionaryLoader.Dictionary("nl_NL");

            var stemmer = new HunspellStemmer(dictionary);
            var stems = stemmer.Stem("fietsen").ToList();

            Assert.AreEqual(2, stems.Count);
            Assert.AreEqual("fietsen", stems[0].Stem);
            Assert.AreEqual("fiets", stems[1].Stem);

            stems = stemmer.Stem("fiets").ToList();
            Assert.AreEqual(1, stems.Count);
            Assert.AreEqual("fiets", stems[0].Stem);
        }
        public void TestStem_SimplePrefix_EnUS() {
            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");

            var stemmer = new HunspellStemmer(dictionary);
            var stems = stemmer.Stem("remove").ToList();

            Assert.AreEqual(1, stems.Count);
            Assert.AreEqual("move", stems[0].Stem);
        }
        public void TestStem_RecursiveSuffix_EnUS() {
            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");

            var stemmer = new HunspellStemmer(dictionary);
            var stems = stemmer.Stem("drinkables").ToList();

            Assert.AreEqual(1, stems.Count);
            Assert.AreEqual("drink", stems[0].Stem);
        }
Beispiel #4
0
        public override Boolean IncrementToken()
        {
            if (_buffer.Any())
            {
                var nextStem = _buffer.Dequeue();

                RestoreState(_savedState);
                _posIncAtt.PositionIncrement = 0;
                _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength);
                return(true);
            }

            if (!input.IncrementToken())
            {
                return(false);
            }

            var newTerms = _dedup
                               ? _stemmer.UniqueStems(_termAtt.Term)
                               : _stemmer.Stem(_termAtt.Term);

            foreach (var newTerm in newTerms)
            {
                _buffer.Enqueue(newTerm);
            }

            if (_buffer.Count == 0)
            {
                // originaly: we do not know this word, return it unchanged
                // changed: apply SlovakStemmer on words not found in dictionary (possible named entities)
                var currentTerm = new string(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
                if (!string.IsNullOrEmpty(currentTerm))
                {
                    _slovakStemmer.Stem(_termAtt.TermBuffer(), _termAtt.TermLength(), out char[] newTerm, out var newLength);
                    _termAtt.SetTermBuffer(newTerm, 0, newLength);
                    _termAtt.SetTermLength(newLength);
                }
                return(true);
            }

            var stem = _buffer.Dequeue();

            _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength);

            if (_buffer.Count > 0)
            {
                _savedState = CaptureState();
            }

            return(true);
        }
        public override Boolean IncrementToken()
        {
            if (_buffer.Any())
            {
                var nextStem = _buffer.Dequeue();

                RestoreState(_savedState);
                _posIncAtt.SetPositionIncrement(0);
                _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength);
                return(true);
            }

            if (!input.IncrementToken())
            {
                return(false);
            }

            var newTerms = _dedup
                               ? _stemmer.UniqueStems(_termAtt.Term())
                               : _stemmer.Stem(_termAtt.Term());

            foreach (var newTerm in newTerms)
            {
                _buffer.Enqueue(newTerm);
            }

            if (_buffer.Count == 0)
            {
                // we do not know this word, return it unchanged
                return(true);
            }

            var stem = _buffer.Dequeue();

            _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength);

            if (_buffer.Count > 0)
            {
                _savedState = CaptureState();
            }

            return(true);
        }