HunspellStemmer uses the affix rules declared in the HunspellDictionary to generate one or more stems for a word. It conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
        /// <summary>
        ///   Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using
        ///   affix rules in the provided HunspellDictionary.
        /// </summary>
        /// <param name="input">TokenStream whose tokens will be stemmed.</param>
        /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param>
        /// <param name="dedup">true if only unique terms should be output.</param>
        public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true)
            : base(input) {
            _posIncAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
            _termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));

            _dedup = dedup;
            _stemmer = new HunspellStemmer(dictionary);
        }
        /// <summary>
        ///   Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using
        ///   affix rules in the provided HunspellDictionary.
        /// </summary>
        /// <param name="input">TokenStream whose tokens will be stemmed.</param>
        /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param>
        /// <param name="dedup">true if only unique terms should be output.</param>
        public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true)
            : base(input)
        {
            _posIncAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
            _termAtt   = (TermAttribute)AddAttribute(typeof(TermAttribute));

            _dedup   = dedup;
            _stemmer = new HunspellStemmer(dictionary);
        }
Ejemplo n.º 3
0
        public void TestStem_RecursiveSuffix_EnUS() {
            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");

            var stemmer = new HunspellStemmer(dictionary);
            var stems = stemmer.Stem("drinkables").ToList();

            Assert.AreEqual(1, stems.Count);
            Assert.AreEqual("drink", stems[0].Stem);
        }
Ejemplo n.º 4
0
        public void TestStem_SimplePrefix_EnUS() {
            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");

            var stemmer = new HunspellStemmer(dictionary);
            var stems = stemmer.Stem("remove").ToList();

            Assert.AreEqual(1, stems.Count);
            Assert.AreEqual("move", stems[0].Stem);
        }
Ejemplo n.º 5
0
        /// <summary>
        ///   Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using
        ///   affix rules in the provided HunspellDictionary.
        /// </summary>
        /// <param name="input">TokenStream whose tokens will be stemmed.</param>
        /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param>
        /// <param name="dedup">true if only unique terms should be output.</param>
        public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true)
            : base(input)
        {
            _posIncAtt = AddAttribute <IPositionIncrementAttribute>();
            _termAtt   = AddAttribute <ITermAttribute>();

            _dedup         = dedup;
            _stemmer       = new HunspellStemmer(dictionary);
            _slovakStemmer = new SlovakStemmer();
        }
Ejemplo n.º 6
0
        public void TestStem_fietsenFiets_NlNL() {
            var dictionary = HunspellDictionaryLoader.Dictionary("nl_NL");

            var stemmer = new HunspellStemmer(dictionary);
            var stems = stemmer.Stem("fietsen").ToList();

            Assert.AreEqual(2, stems.Count);
            Assert.AreEqual("fietsen", stems[0].Stem);
            Assert.AreEqual("fiets", stems[1].Stem);

            stems = stemmer.Stem("fiets").ToList();
            Assert.AreEqual(1, stems.Count);
            Assert.AreEqual("fiets", stems[0].Stem);
        }