Пример #1
0
        /// <param name="s"> buffer, oversized to at least <code>len+1</code> </param>
        /// <param name="len"> initial valid length of buffer </param>
        /// <returns> new valid length, stemmed </returns>
        public virtual int Stem(char[] s, int len)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(s.Length >= len + 1, "this stemmer requires an oversized array of at least 1");
            }

            len = plural.Apply(s, len);
            len = unification.Apply(s, len);
            len = adverb.Apply(s, len);

            int oldlen;

            do
            {
                oldlen = len;
                len    = augmentative.Apply(s, len);
            } while (len != oldlen);

            oldlen = len;
            len    = noun.Apply(s, len);
            if (len == oldlen) // suffix not removed
            {
                len = verb.Apply(s, len);
            }

            len = vowel.Apply(s, len);

            // RSLG accent removal
            for (int i = 0; i < len; i++)
            {
                switch (s[i])
                {
                case 'á':
                    s[i] = 'a';
                    break;

                case 'é':
                case 'ê':
                    s[i] = 'e';
                    break;

                case 'í':
                    s[i] = 'i';
                    break;

                case 'ó':
                    s[i] = 'o';
                    break;

                case 'ú':
                    s[i] = 'u';
                    break;
                }
            }

            return(len);
        }
Пример #2
0
 public virtual int Stem(char[] s, int len)
 {
     return(pluralStep.Apply(s, len));
 }
Пример #3
0
        /// <param name="s"> buffer, oversized to at least <code>len+1</code> </param>
        /// <param name="len"> initial valid length of buffer </param>
        /// <returns> new valid length, stemmed </returns>
        public virtual int Stem(char[] s, int len)
        {
            Debug.Assert(s.Length >= len + 1, "this stemmer requires an oversized array of at least 1");

            len = plural.Apply(s, len);
            len = adverb.Apply(s, len);
            len = feminine.Apply(s, len);
            len = augmentative.Apply(s, len);

            int oldlen = len;

            len = noun.Apply(s, len);

            if (len == oldlen) // suffix not removed
            {
                oldlen = len;

                len = verb.Apply(s, len);

                if (len == oldlen) // suffix not removed
                {
                    len = vowel.Apply(s, len);
                }
            }

            // rslp accent removal
            for (int i = 0; i < len; i++)
            {
                switch (s[i])
                {
                case 'à':
                case 'á':
                case 'â':
                case 'ã':
                case 'ä':
                case 'å':
                    s[i] = 'a';
                    break;

                case 'ç':
                    s[i] = 'c';
                    break;

                case 'è':
                case 'é':
                case 'ê':
                case 'ë':
                    s[i] = 'e';
                    break;

                case 'ì':
                case 'í':
                case 'î':
                case 'ï':
                    s[i] = 'i';
                    break;

                case 'ñ':
                    s[i] = 'n';
                    break;

                case 'ò':
                case 'ó':
                case 'ô':
                case 'õ':
                case 'ö':
                    s[i] = 'o';
                    break;

                case 'ù':
                case 'ú':
                case 'û':
                case 'ü':
                    s[i] = 'u';
                    break;

                case 'ý':
                case 'ÿ':
                    s[i] = 'y';
                    break;
                }
            }
            return(len);
        }