private int Rule16(char[] s, int len) { bool removed = false; if (len > 4 && StemmerUtil.EndsWith(s, len, "ησου")) { len -= 4; removed = true; } else if (len > 3 && (StemmerUtil.EndsWith(s, len, "ησε") || StemmerUtil.EndsWith(s, len, "ησα"))) { len -= 3; removed = true; } if (removed && exc16.Contains(s, 0, len)) { len += 2; // add back -ησ } return len; }
private int Rule5(char[] s, int len) { if (len > 2 && StemmerUtil.EndsWith(s, len, "ια")) { len -= 2; if (EndsWithVowel(s, len)) { len++; // add back -ι } } else if (len > 3 && (StemmerUtil.EndsWith(s, len, "ιου") || StemmerUtil.EndsWith(s, len, "ιων"))) { len -= 3; if (EndsWithVowel(s, len)) { len++; // add back -ι } } return(len); }
private int RemoveSuffix(char[] text, int length) { if (StemmerUtil.EndsWith(text, length, "kan") && (flags & REMOVED_KE) == 0 && (flags & REMOVED_PENG) == 0 && (flags & REMOVED_PE) == 0) { numSyllables--; return(length - 3); } if (StemmerUtil.EndsWith(text, length, "an") && (flags & REMOVED_DI) == 0 && (flags & REMOVED_MENG) == 0 && (flags & REMOVED_TER) == 0) { numSyllables--; return(length - 2); } if (StemmerUtil.EndsWith(text, length, "i") && !StemmerUtil.EndsWith(text, length, "si") && (flags & REMOVED_BER) == 0 && (flags & REMOVED_KE) == 0 && (flags & REMOVED_PENG) == 0) { numSyllables--; return(length - 1); } return(length); }
#pragma warning restore 612, 618 private int Rule12(char[] s, int len) { if (len > 5 && StemmerUtil.EndsWith(s, len, "ιεστε")) { len -= 5; if (exc12a.Contains(s, 0, len)) { len += 4; // add back -ιεστ } } if (len > 4 && StemmerUtil.EndsWith(s, len, "εστε")) { len -= 4; if (exc12b.Contains(s, 0, len)) { len += 3; // add back -εστ } } return(len); }
private int Rule1(char[] s, int len) { if (len > 4 && (StemmerUtil.EndsWith(s, len, "αδεσ") || StemmerUtil.EndsWith(s, len, "αδων"))) { len -= 4; if (!(StemmerUtil.EndsWith(s, len, "οκ") || StemmerUtil.EndsWith(s, len, "μαμ") || StemmerUtil.EndsWith(s, len, "μαν") || StemmerUtil.EndsWith(s, len, "μπαμπ") || StemmerUtil.EndsWith(s, len, "πατερ") || StemmerUtil.EndsWith(s, len, "γιαγι") || StemmerUtil.EndsWith(s, len, "νταντ") || StemmerUtil.EndsWith(s, len, "κυρ") || StemmerUtil.EndsWith(s, len, "θει") || StemmerUtil.EndsWith(s, len, "πεθερ"))) { len += 2; // add back -αδ } } return(len); }
/// <summary> /// Mainly remove the definite article </summary> /// <param name="s"> input buffer </param> /// <param name="len"> length of input buffer </param> /// <returns> new stemmed length </returns> private int RemoveArticle(char[] s, int len) { if (len > 6 && StemmerUtil.EndsWith(s, len, "ият")) { return(len - 3); } if (len > 5) { if (StemmerUtil.EndsWith(s, len, "ът") || StemmerUtil.EndsWith(s, len, "то") || StemmerUtil.EndsWith(s, len, "те") || StemmerUtil.EndsWith(s, len, "та") || StemmerUtil.EndsWith(s, len, "ия")) { return(len - 2); } } if (len > 4 && StemmerUtil.EndsWith(s, len, "ят")) { return(len - 2); } return(len); }
private int Step2(char[] s, int len) { if (len > 5) { if (StemmerUtil.EndsWith(s, len, "lla") || StemmerUtil.EndsWith(s, len, "tse") || StemmerUtil.EndsWith(s, len, "sti")) { return(len - 3); } if (StemmerUtil.EndsWith(s, len, "ni")) { return(len - 2); } if (StemmerUtil.EndsWith(s, len, "aa")) { return(len - 1); // aa -> a } } return(len); }
private int Rule15(char[] s, int len) { bool removed = false; if (len > 4 && StemmerUtil.EndsWith(s, len, "αγεσ")) { len -= 4; removed = true; } else if (len > 3 && (StemmerUtil.EndsWith(s, len, "αγα") || StemmerUtil.EndsWith(s, len, "αγε"))) { len -= 3; removed = true; } if (removed) { bool cond1 = exc15a.Contains(s, 0, len) || StemmerUtil.EndsWith(s, len, "οφ") || StemmerUtil.EndsWith(s, len, "πελ") || StemmerUtil.EndsWith(s, len, "χορτ") || StemmerUtil.EndsWith(s, len, "λλ") || StemmerUtil.EndsWith(s, len, "σφ") || StemmerUtil.EndsWith(s, len, "ρπ") || StemmerUtil.EndsWith(s, len, "φρ") || StemmerUtil.EndsWith(s, len, "πρ") || StemmerUtil.EndsWith(s, len, "λοχ") || StemmerUtil.EndsWith(s, len, "σμην"); bool cond2 = exc15b.Contains(s, 0, len) || StemmerUtil.EndsWith(s, len, "κολλ"); if (cond1 && !cond2) { len += 2; // add back -αγ } } return(len); }
private int Rule13(char[] s, int len) { if (len > 6 && StemmerUtil.EndsWith(s, len, "ηθηκεσ")) { len -= 6; } else if (len > 5 && (StemmerUtil.EndsWith(s, len, "ηθηκα") || StemmerUtil.EndsWith(s, len, "ηθηκε"))) { len -= 5; } bool removed = false; if (len > 4 && StemmerUtil.EndsWith(s, len, "ηκεσ")) { len -= 4; removed = true; } else if (len > 3 && (StemmerUtil.EndsWith(s, len, "ηκα") || StemmerUtil.EndsWith(s, len, "ηκε"))) { len -= 3; removed = true; } if (removed && (exc13.Contains(s, 0, len) || StemmerUtil.EndsWith(s, len, "σκωλ") || StemmerUtil.EndsWith(s, len, "σκουλ") || StemmerUtil.EndsWith(s, len, "ναρθ") || StemmerUtil.EndsWith(s, len, "σφ") || StemmerUtil.EndsWith(s, len, "οθ") || StemmerUtil.EndsWith(s, len, "πιθ"))) { len += 2; // add back the -ηκ } return(len); }
private int Rule19(char[] s, int len) { bool removed = false; if (len > 6 && (StemmerUtil.EndsWith(s, len, "ησουμε") || StemmerUtil.EndsWith(s, len, "ηθουμε"))) { len -= 6; removed = true; } else if (len > 4 && StemmerUtil.EndsWith(s, len, "ουμε")) { len -= 4; removed = true; } if (removed && exc19.Contains(s, 0, len)) { len += 3; s[len - 3] = 'ο'; s[len - 2] = 'υ'; s[len - 1] = 'μ'; } return(len); }
private int RemoveCase(char[] s, int len) { if (len > 6 && (StemmerUtil.EndsWith(s, len, "иями") || StemmerUtil.EndsWith(s, len, "оями"))) { return(len - 4); } if (len > 5 && (StemmerUtil.EndsWith(s, len, "иям") || StemmerUtil.EndsWith(s, len, "иях") || StemmerUtil.EndsWith(s, len, "оях") || StemmerUtil.EndsWith(s, len, "ями") || StemmerUtil.EndsWith(s, len, "оям") || StemmerUtil.EndsWith(s, len, "оьв") || StemmerUtil.EndsWith(s, len, "ами") || StemmerUtil.EndsWith(s, len, "его") || StemmerUtil.EndsWith(s, len, "ему") || StemmerUtil.EndsWith(s, len, "ери") || StemmerUtil.EndsWith(s, len, "ими") || StemmerUtil.EndsWith(s, len, "ого") || StemmerUtil.EndsWith(s, len, "ому") || StemmerUtil.EndsWith(s, len, "ыми") || StemmerUtil.EndsWith(s, len, "оев"))) { return(len - 3); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "ая") || StemmerUtil.EndsWith(s, len, "яя") || StemmerUtil.EndsWith(s, len, "ях") || StemmerUtil.EndsWith(s, len, "юю") || StemmerUtil.EndsWith(s, len, "ах") || StemmerUtil.EndsWith(s, len, "ею") || StemmerUtil.EndsWith(s, len, "их") || StemmerUtil.EndsWith(s, len, "ия") || StemmerUtil.EndsWith(s, len, "ию") || StemmerUtil.EndsWith(s, len, "ьв") || StemmerUtil.EndsWith(s, len, "ою") || StemmerUtil.EndsWith(s, len, "ую") || StemmerUtil.EndsWith(s, len, "ям") || StemmerUtil.EndsWith(s, len, "ых") || StemmerUtil.EndsWith(s, len, "ея") || StemmerUtil.EndsWith(s, len, "ам") || StemmerUtil.EndsWith(s, len, "ем") || StemmerUtil.EndsWith(s, len, "ей") || StemmerUtil.EndsWith(s, len, "ём") || StemmerUtil.EndsWith(s, len, "ев") || StemmerUtil.EndsWith(s, len, "ий") || StemmerUtil.EndsWith(s, len, "им") || StemmerUtil.EndsWith(s, len, "ое") || StemmerUtil.EndsWith(s, len, "ой") || StemmerUtil.EndsWith(s, len, "ом") || StemmerUtil.EndsWith(s, len, "ов") || StemmerUtil.EndsWith(s, len, "ые") || StemmerUtil.EndsWith(s, len, "ый") || StemmerUtil.EndsWith(s, len, "ым") || StemmerUtil.EndsWith(s, len, "ми"))) { return(len - 2); } if (len > 3) { switch (s[len - 1]) { case 'а': case 'е': case 'и': case 'о': case 'у': case 'й': case 'ы': case 'я': case 'ь': return(len - 1); } } return(len); }
private int Rule14(char[] s, int len) { bool removed = false; if (len > 5 && StemmerUtil.EndsWith(s, len, "ουσεσ")) { len -= 5; removed = true; } else if (len > 4 && (StemmerUtil.EndsWith(s, len, "ουσα") || StemmerUtil.EndsWith(s, len, "ουσε"))) { len -= 4; removed = true; } if (removed && (exc14.Contains(s, 0, len) || EndsWithVowel(s, len) || StemmerUtil.EndsWith(s, len, "ποδαρ") || StemmerUtil.EndsWith(s, len, "βλεπ") || StemmerUtil.EndsWith(s, len, "πανταχ") || StemmerUtil.EndsWith(s, len, "φρυδ") || StemmerUtil.EndsWith(s, len, "μαντιλ") || StemmerUtil.EndsWith(s, len, "μαλλ") || StemmerUtil.EndsWith(s, len, "κυματ") || StemmerUtil.EndsWith(s, len, "λαχ") || StemmerUtil.EndsWith(s, len, "ληγ") || StemmerUtil.EndsWith(s, len, "φαγ") || StemmerUtil.EndsWith(s, len, "ομ") || StemmerUtil.EndsWith(s, len, "πρωτ"))) { len += 3; // add back -ουσ } return(len); }
/// <returns> new valid length of the string after applying the entire step. </returns> public virtual int Apply(char[] s, int len) { if (len < m_min) { return(len); } if (m_suffixes != null) { bool found = false; for (int i = 0; i < m_suffixes.Length; i++) { if (StemmerUtil.EndsWith(s, len, m_suffixes[i])) { found = true; break; } } if (!found) { return(len); } } for (int i = 0; i < m_rules.Length; i++) { if (m_rules[i].Matches(s, len)) { return(m_rules[i].Replace(s, len)); } } return(len); }
/// <returns> true if the word matches this rule. </returns> public virtual bool Matches(char[] s, int len) { return(len - m_suffix.Length >= m_min && StemmerUtil.EndsWith(s, len, m_suffix)); }
private int RemovePossessive(char[] s, int len) { if (len > 6) { if (!IsVowel(s[len - 5]) && (StemmerUtil.EndsWith(s, len, "atok") || StemmerUtil.EndsWith(s, len, "otok") || StemmerUtil.EndsWith(s, len, "etek"))) { return(len - 4); } if (StemmerUtil.EndsWith(s, len, "itek") || StemmerUtil.EndsWith(s, len, "itok")) { return(len - 4); } } if (len > 5) { if (!IsVowel(s[len - 4]) && (StemmerUtil.EndsWith(s, len, "unk") || StemmerUtil.EndsWith(s, len, "tok") || StemmerUtil.EndsWith(s, len, "tek"))) { return(len - 3); } if (IsVowel(s[len - 4]) && StemmerUtil.EndsWith(s, len, "juk")) { return(len - 3); } if (StemmerUtil.EndsWith(s, len, "ink")) { return(len - 3); } } if (len > 4) { if (!IsVowel(s[len - 3]) && (StemmerUtil.EndsWith(s, len, "am") || StemmerUtil.EndsWith(s, len, "em") || StemmerUtil.EndsWith(s, len, "om") || StemmerUtil.EndsWith(s, len, "ad") || StemmerUtil.EndsWith(s, len, "ed") || StemmerUtil.EndsWith(s, len, "od") || StemmerUtil.EndsWith(s, len, "uk"))) { return(len - 2); } if (IsVowel(s[len - 3]) && (StemmerUtil.EndsWith(s, len, "nk") || StemmerUtil.EndsWith(s, len, "ja") || StemmerUtil.EndsWith(s, len, "je"))) { return(len - 2); } if (StemmerUtil.EndsWith(s, len, "im") || StemmerUtil.EndsWith(s, len, "id") || StemmerUtil.EndsWith(s, len, "ik")) { return(len - 2); } } if (len > 3) { switch (s[len - 1]) { case 'a': case 'e': if (!IsVowel(s[len - 2])) { return(len - 1); } break; case 'm': case 'd': if (IsVowel(s[len - 2])) { return(len - 1); } break; case 'i': return(len - 1); } } return(len); }
private int RemoveCase(char[] s, int len) { if (len > 6 && StemmerUtil.EndsWith(s, len, "kent")) { return(len - 4); } if (len > 5) { if (StemmerUtil.EndsWith(s, len, "nak") || StemmerUtil.EndsWith(s, len, "nek") || StemmerUtil.EndsWith(s, len, "val") || StemmerUtil.EndsWith(s, len, "vel") || StemmerUtil.EndsWith(s, len, "ert") || StemmerUtil.EndsWith(s, len, "rol") || StemmerUtil.EndsWith(s, len, "ban") || StemmerUtil.EndsWith(s, len, "ben") || StemmerUtil.EndsWith(s, len, "bol") || StemmerUtil.EndsWith(s, len, "nal") || StemmerUtil.EndsWith(s, len, "nel") || StemmerUtil.EndsWith(s, len, "hoz") || StemmerUtil.EndsWith(s, len, "hez") || StemmerUtil.EndsWith(s, len, "tol")) { return(len - 3); } if (StemmerUtil.EndsWith(s, len, "al") || StemmerUtil.EndsWith(s, len, "el")) { if (!IsVowel(s[len - 3]) && s[len - 3] == s[len - 4]) { return(len - 3); } } } if (len > 4) { if (StemmerUtil.EndsWith(s, len, "at") || StemmerUtil.EndsWith(s, len, "et") || StemmerUtil.EndsWith(s, len, "ot") || StemmerUtil.EndsWith(s, len, "va") || StemmerUtil.EndsWith(s, len, "ve") || StemmerUtil.EndsWith(s, len, "ra") || StemmerUtil.EndsWith(s, len, "re") || StemmerUtil.EndsWith(s, len, "ba") || StemmerUtil.EndsWith(s, len, "be") || StemmerUtil.EndsWith(s, len, "ul") || StemmerUtil.EndsWith(s, len, "ig")) { return(len - 2); } if ((StemmerUtil.EndsWith(s, len, "on") || StemmerUtil.EndsWith(s, len, "en")) && !IsVowel(s[len - 3])) { return(len - 2); } switch (s[len - 1]) { case 't': case 'n': return(len - 1); case 'a': case 'e': if (s[len - 2] == s[len - 3] && !IsVowel(s[len - 2])) { return(len - 2); } break; } } return(len); }
private int RemovePossessives(char[] s, int len) { if (len > 5 && (StemmerUtil.EndsWith(s, len, "ov") || StemmerUtil.EndsWith(s, len, "in") || StemmerUtil.EndsWith(s, len, "ův"))) { return(len - 2); } return(len); }
private int Rule21(char[] s, int len) { if (len > 9 && StemmerUtil.EndsWith(s, len, "ιοντουσαν")) { return(len - 9); } if (len > 8 && (StemmerUtil.EndsWith(s, len, "ιομασταν") || StemmerUtil.EndsWith(s, len, "ιοσασταν") || StemmerUtil.EndsWith(s, len, "ιουμαστε") || StemmerUtil.EndsWith(s, len, "οντουσαν"))) { return(len - 8); } if (len > 7 && (StemmerUtil.EndsWith(s, len, "ιεμαστε") || StemmerUtil.EndsWith(s, len, "ιεσαστε") || StemmerUtil.EndsWith(s, len, "ιομουνα") || StemmerUtil.EndsWith(s, len, "ιοσαστε") || StemmerUtil.EndsWith(s, len, "ιοσουνα") || StemmerUtil.EndsWith(s, len, "ιουνται") || StemmerUtil.EndsWith(s, len, "ιουνταν") || StemmerUtil.EndsWith(s, len, "ηθηκατε") || StemmerUtil.EndsWith(s, len, "ομασταν") || StemmerUtil.EndsWith(s, len, "οσασταν") || StemmerUtil.EndsWith(s, len, "ουμαστε"))) { return(len - 7); } if (len > 6 && (StemmerUtil.EndsWith(s, len, "ιομουν") || StemmerUtil.EndsWith(s, len, "ιονταν") || StemmerUtil.EndsWith(s, len, "ιοσουν") || StemmerUtil.EndsWith(s, len, "ηθειτε") || StemmerUtil.EndsWith(s, len, "ηθηκαν") || StemmerUtil.EndsWith(s, len, "ομουνα") || StemmerUtil.EndsWith(s, len, "οσαστε") || StemmerUtil.EndsWith(s, len, "οσουνα") || StemmerUtil.EndsWith(s, len, "ουνται") || StemmerUtil.EndsWith(s, len, "ουνταν") || StemmerUtil.EndsWith(s, len, "ουσατε"))) { return(len - 6); } if (len > 5 && (StemmerUtil.EndsWith(s, len, "αγατε") || StemmerUtil.EndsWith(s, len, "ιεμαι") || StemmerUtil.EndsWith(s, len, "ιεται") || StemmerUtil.EndsWith(s, len, "ιεσαι") || StemmerUtil.EndsWith(s, len, "ιοταν") || StemmerUtil.EndsWith(s, len, "ιουμα") || StemmerUtil.EndsWith(s, len, "ηθεισ") || StemmerUtil.EndsWith(s, len, "ηθουν") || StemmerUtil.EndsWith(s, len, "ηκατε") || StemmerUtil.EndsWith(s, len, "ησατε") || StemmerUtil.EndsWith(s, len, "ησουν") || StemmerUtil.EndsWith(s, len, "ομουν") || StemmerUtil.EndsWith(s, len, "ονται") || StemmerUtil.EndsWith(s, len, "ονταν") || StemmerUtil.EndsWith(s, len, "οσουν") || StemmerUtil.EndsWith(s, len, "ουμαι") || StemmerUtil.EndsWith(s, len, "ουσαν"))) { return(len - 5); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "αγαν") || StemmerUtil.EndsWith(s, len, "αμαι") || StemmerUtil.EndsWith(s, len, "ασαι") || StemmerUtil.EndsWith(s, len, "αται") || StemmerUtil.EndsWith(s, len, "ειτε") || StemmerUtil.EndsWith(s, len, "εσαι") || StemmerUtil.EndsWith(s, len, "εται") || StemmerUtil.EndsWith(s, len, "ηδεσ") || StemmerUtil.EndsWith(s, len, "ηδων") || StemmerUtil.EndsWith(s, len, "ηθει") || StemmerUtil.EndsWith(s, len, "ηκαν") || StemmerUtil.EndsWith(s, len, "ησαν") || StemmerUtil.EndsWith(s, len, "ησει") || StemmerUtil.EndsWith(s, len, "ησεσ") || StemmerUtil.EndsWith(s, len, "ομαι") || StemmerUtil.EndsWith(s, len, "οταν"))) { return(len - 4); } if (len > 3 && (StemmerUtil.EndsWith(s, len, "αει") || StemmerUtil.EndsWith(s, len, "εισ") || StemmerUtil.EndsWith(s, len, "ηθω") || StemmerUtil.EndsWith(s, len, "ησω") || StemmerUtil.EndsWith(s, len, "ουν") || StemmerUtil.EndsWith(s, len, "ουσ"))) { return(len - 3); } if (len > 2 && (StemmerUtil.EndsWith(s, len, "αν") || StemmerUtil.EndsWith(s, len, "ασ") || StemmerUtil.EndsWith(s, len, "αω") || StemmerUtil.EndsWith(s, len, "ει") || StemmerUtil.EndsWith(s, len, "εσ") || StemmerUtil.EndsWith(s, len, "ησ") || StemmerUtil.EndsWith(s, len, "οι") || StemmerUtil.EndsWith(s, len, "οσ") || StemmerUtil.EndsWith(s, len, "ου") || StemmerUtil.EndsWith(s, len, "υσ") || StemmerUtil.EndsWith(s, len, "ων"))) { return(len - 2); } if (len > 1 && EndsWithVowel(s, len)) { return(len - 1); } return(len); }
private int Rule0(char[] s, int len) { if (len > 9 && (StemmerUtil.EndsWith(s, len, "καθεστωτοσ") || StemmerUtil.EndsWith(s, len, "καθεστωτων"))) { return(len - 4); } if (len > 8 && (StemmerUtil.EndsWith(s, len, "γεγονοτοσ") || StemmerUtil.EndsWith(s, len, "γεγονοτων"))) { return(len - 4); } if (len > 8 && StemmerUtil.EndsWith(s, len, "καθεστωτα")) { return(len - 3); } if (len > 7 && (StemmerUtil.EndsWith(s, len, "τατογιου") || StemmerUtil.EndsWith(s, len, "τατογιων"))) { return(len - 4); } if (len > 7 && StemmerUtil.EndsWith(s, len, "γεγονοτα")) { return(len - 3); } if (len > 7 && StemmerUtil.EndsWith(s, len, "καθεστωσ")) { return(len - 2); } if (len > 6 && (StemmerUtil.EndsWith(s, len, "σκαγιου")) || StemmerUtil.EndsWith(s, len, "σκαγιων") || StemmerUtil.EndsWith(s, len, "ολογιου") || StemmerUtil.EndsWith(s, len, "ολογιων") || StemmerUtil.EndsWith(s, len, "κρεατοσ") || StemmerUtil.EndsWith(s, len, "κρεατων") || StemmerUtil.EndsWith(s, len, "περατοσ") || StemmerUtil.EndsWith(s, len, "περατων") || StemmerUtil.EndsWith(s, len, "τερατοσ") || StemmerUtil.EndsWith(s, len, "τερατων")) { return(len - 4); } if (len > 6 && StemmerUtil.EndsWith(s, len, "τατογια")) { return(len - 3); } if (len > 6 && StemmerUtil.EndsWith(s, len, "γεγονοσ")) { return(len - 2); } if (len > 5 && (StemmerUtil.EndsWith(s, len, "φαγιου") || StemmerUtil.EndsWith(s, len, "φαγιων") || StemmerUtil.EndsWith(s, len, "σογιου") || StemmerUtil.EndsWith(s, len, "σογιων"))) { return(len - 4); } if (len > 5 && (StemmerUtil.EndsWith(s, len, "σκαγια") || StemmerUtil.EndsWith(s, len, "ολογια") || StemmerUtil.EndsWith(s, len, "κρεατα") || StemmerUtil.EndsWith(s, len, "περατα") || StemmerUtil.EndsWith(s, len, "τερατα"))) { return(len - 3); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "φαγια") || StemmerUtil.EndsWith(s, len, "σογια") || StemmerUtil.EndsWith(s, len, "φωτοσ") || StemmerUtil.EndsWith(s, len, "φωτων"))) { return(len - 3); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "κρεασ") || StemmerUtil.EndsWith(s, len, "περασ") || StemmerUtil.EndsWith(s, len, "τερασ"))) { return(len - 2); } if (len > 3 && StemmerUtil.EndsWith(s, len, "φωτα")) { return(len - 2); } if (len > 2 && StemmerUtil.EndsWith(s, len, "φωσ")) { return(len - 1); } return(len); }
public virtual int stem(char[] s, int len) { // Remove posessive -s (bilens -> bilen) and continue checking if (len > 4 && s[len - 1] == 's') { len--; } // Remove common endings, single-pass if (len > 7 && ((StemmerUtil.EndsWith(s, len, "heter") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "heten") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "heita") && useNynorsk))) // general ending (hemmeleg-heita -> hemmeleg) - general ending (hemmelig-heten -> hemmelig) - general ending (hemmelig-heter -> hemmelig) { return(len - 5); } // Remove Nynorsk common endings, single-pass if (len > 8 && useNynorsk && (StemmerUtil.EndsWith(s, len, "heiter") || StemmerUtil.EndsWith(s, len, "leiken") || StemmerUtil.EndsWith(s, len, "leikar"))) // general ending (trygg-leikar -> trygg) - general ending (trygg-leiken -> trygg) - general ending (hemmeleg-heiter -> hemmeleg) { return(len - 6); } if (len > 5 && (StemmerUtil.EndsWith(s, len, "dom") || (StemmerUtil.EndsWith(s, len, "het") && useBokmaal))) // general ending (hemmelig-het -> hemmelig) - general ending (kristen-dom -> kristen) { return(len - 3); } if (len > 6 && useNynorsk && (StemmerUtil.EndsWith(s, len, "heit") || StemmerUtil.EndsWith(s, len, "semd") || StemmerUtil.EndsWith(s, len, "leik"))) // general ending (trygg-leik -> trygg) - general ending (verk-semd -> verk) - general ending (hemmeleg-heit -> hemmeleg) { return(len - 4); } if (len > 7 && (StemmerUtil.EndsWith(s, len, "elser") || StemmerUtil.EndsWith(s, len, "elsen"))) // general ending (føl-elsen -> føl) - general ending (føl-elser -> føl) { return(len - 5); } if (len > 6 && ((StemmerUtil.EndsWith(s, len, "ende") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "ande") && useNynorsk) || StemmerUtil.EndsWith(s, len, "else") || (StemmerUtil.EndsWith(s, len, "este") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "aste") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "eren") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "aren") && useNynorsk))) // masc - masc - adj (fin-aste -> fin) - adj (fin-este -> fin) - general ending (føl-else -> føl) - (sov-ande -> sov) - (sov-ende -> sov) { return(len - 4); } if (len > 5 && ((StemmerUtil.EndsWith(s, len, "ere") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "are") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "est") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "ast") && useNynorsk) || StemmerUtil.EndsWith(s, len, "ene") || (StemmerUtil.EndsWith(s, len, "ane") && useNynorsk))) // masc pl definite (gut-ane) - masc/fem/neutr pl definite (hus-ene) - adj (fin-ast -> fin) - adj (fin-est -> fin) - adj (fin-are -> fin) - adj (fin-ere -> fin) { return(len - 3); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "et") || (StemmerUtil.EndsWith(s, len, "ar") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "st") && useBokmaal) || StemmerUtil.EndsWith(s, len, "te"))) // adj (billig-st -> billig) - masc pl indefinite - neutr definite - masc/fem definite - masc/fem indefinite { return(len - 2); } if (len > 3) { switch (s[len - 1]) { case 'a': // fem definite case 'e': // to get correct stem for nouns ending in -e (kake -> kak, kaker -> kak) case 'n': return(len - 1); } } return(len); }
public virtual int Stem(char[] s, int len) { // Remove genitiv s if (len > 4 && s[len - 1] == 's') { len--; } if (len > 5 && (StemmerUtil.EndsWith(s, len, "ene") || (StemmerUtil.EndsWith(s, len, "ane") && useNynorsk))) // masc pl definite (gut-ane) - masc/fem/neutr pl definite (hus-ene) { return(len - 3); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "et") || (StemmerUtil.EndsWith(s, len, "ar") && useNynorsk))) // masc pl indefinite - neutr definite - masc/fem definite - masc/fem indefinite { return(len - 2); } if (len > 3) { switch (s[len - 1]) { case 'a': // fem definite case 'e': // to get correct stem for nouns ending in -e (kake -> kak, kaker -> kak) return(len - 1); } } return(len); }
/// <summary> /// Stem an input buffer of Bulgarian text. /// </summary> /// <param name="s"> input buffer </param> /// <param name="len"> length of input buffer </param> /// <returns> length of input buffer after normalization </returns> public virtual int Stem(char[] s, int len) { if (len < 4) // do not stem { return(len); } if (len > 5 && StemmerUtil.EndsWith(s, len, "ища")) { return(len - 3); } len = RemoveArticle(s, len); len = RemovePlural(s, len); if (len > 3) { if (StemmerUtil.EndsWith(s, len, "я")) { len--; } if (StemmerUtil.EndsWith(s, len, "а") || StemmerUtil.EndsWith(s, len, "о") || StemmerUtil.EndsWith(s, len, "е")) { len--; } } // the rule to rewrite ен -> н is duplicated in the paper. // in the perl implementation referenced by the paper, this is fixed. // (it is fixed here as well) if (len > 4 && StemmerUtil.EndsWith(s, len, "ен")) { s[len - 2] = 'н'; // replace with н len--; } if (len > 5 && s[len - 2] == 'ъ') { s[len - 2] = s[len - 1]; // replace ъN with N len--; } return(len); }
private int RemovePlural(char[] s, int len) { if (len > 6) { if (StemmerUtil.EndsWith(s, len, "овци")) { return(len - 3); // replace with о } if (StemmerUtil.EndsWith(s, len, "ове")) { return(len - 3); } if (StemmerUtil.EndsWith(s, len, "еве")) { s[len - 3] = 'й'; // replace with й return(len - 2); } } if (len > 5) { if (StemmerUtil.EndsWith(s, len, "ища")) { return(len - 3); } if (StemmerUtil.EndsWith(s, len, "та")) { return(len - 2); } if (StemmerUtil.EndsWith(s, len, "ци")) { s[len - 2] = 'к'; // replace with к return(len - 1); } if (StemmerUtil.EndsWith(s, len, "зи")) { s[len - 2] = 'г'; // replace with г return(len - 1); } if (s[len - 3] == 'е' && s[len - 1] == 'и') { s[len - 3] = 'я'; // replace е with я, remove и return(len - 1); } } if (len > 4) { if (StemmerUtil.EndsWith(s, len, "си")) { s[len - 2] = 'х'; // replace with х return(len - 1); } if (StemmerUtil.EndsWith(s, len, "и")) { return(len - 1); } } return(len); }
public virtual int Stem(char[] buffer, int len) { // 5 if ((len > 6) && (StemmerUtil.EndsWith(buffer, len, "ाएंगी") || StemmerUtil.EndsWith(buffer, len, "ाएंगे") || StemmerUtil.EndsWith(buffer, len, "ाऊंगी") || StemmerUtil.EndsWith(buffer, len, "ाऊंगा") || StemmerUtil.EndsWith(buffer, len, "ाइयाँ") || StemmerUtil.EndsWith(buffer, len, "ाइयों") || StemmerUtil.EndsWith(buffer, len, "ाइयां"))) { return(len - 5); } // 4 if ((len > 5) && (StemmerUtil.EndsWith(buffer, len, "ाएगी") || StemmerUtil.EndsWith(buffer, len, "ाएगा") || StemmerUtil.EndsWith(buffer, len, "ाओगी") || StemmerUtil.EndsWith(buffer, len, "ाओगे") || StemmerUtil.EndsWith(buffer, len, "एंगी") || StemmerUtil.EndsWith(buffer, len, "ेंगी") || StemmerUtil.EndsWith(buffer, len, "एंगे") || StemmerUtil.EndsWith(buffer, len, "ेंगे") || StemmerUtil.EndsWith(buffer, len, "ूंगी") || StemmerUtil.EndsWith(buffer, len, "ूंगा") || StemmerUtil.EndsWith(buffer, len, "ातीं") || StemmerUtil.EndsWith(buffer, len, "नाओं") || StemmerUtil.EndsWith(buffer, len, "नाएं") || StemmerUtil.EndsWith(buffer, len, "ताओं") || StemmerUtil.EndsWith(buffer, len, "ताएं") || StemmerUtil.EndsWith(buffer, len, "ियाँ") || StemmerUtil.EndsWith(buffer, len, "ियों") || StemmerUtil.EndsWith(buffer, len, "ियां"))) { return(len - 4); } // 3 if ((len > 4) && (StemmerUtil.EndsWith(buffer, len, "ाकर") || StemmerUtil.EndsWith(buffer, len, "ाइए") || StemmerUtil.EndsWith(buffer, len, "ाईं") || StemmerUtil.EndsWith(buffer, len, "ाया") || StemmerUtil.EndsWith(buffer, len, "ेगी") || StemmerUtil.EndsWith(buffer, len, "ेगा") || StemmerUtil.EndsWith(buffer, len, "ोगी") || StemmerUtil.EndsWith(buffer, len, "ोगे") || StemmerUtil.EndsWith(buffer, len, "ाने") || StemmerUtil.EndsWith(buffer, len, "ाना") || StemmerUtil.EndsWith(buffer, len, "ाते") || StemmerUtil.EndsWith(buffer, len, "ाती") || StemmerUtil.EndsWith(buffer, len, "ाता") || StemmerUtil.EndsWith(buffer, len, "तीं") || StemmerUtil.EndsWith(buffer, len, "ाओं") || StemmerUtil.EndsWith(buffer, len, "ाएं") || StemmerUtil.EndsWith(buffer, len, "ुओं") || StemmerUtil.EndsWith(buffer, len, "ुएं") || StemmerUtil.EndsWith(buffer, len, "ुआं"))) { return(len - 3); } // 2 if ((len > 3) && (StemmerUtil.EndsWith(buffer, len, "कर") || StemmerUtil.EndsWith(buffer, len, "ाओ") || StemmerUtil.EndsWith(buffer, len, "िए") || StemmerUtil.EndsWith(buffer, len, "ाई") || StemmerUtil.EndsWith(buffer, len, "ाए") || StemmerUtil.EndsWith(buffer, len, "ने") || StemmerUtil.EndsWith(buffer, len, "नी") || StemmerUtil.EndsWith(buffer, len, "ना") || StemmerUtil.EndsWith(buffer, len, "ते") || StemmerUtil.EndsWith(buffer, len, "ीं") || StemmerUtil.EndsWith(buffer, len, "ती") || StemmerUtil.EndsWith(buffer, len, "ता") || StemmerUtil.EndsWith(buffer, len, "ाँ") || StemmerUtil.EndsWith(buffer, len, "ां") || StemmerUtil.EndsWith(buffer, len, "ों") || StemmerUtil.EndsWith(buffer, len, "ें"))) { return(len - 2); } // 1 if ((len > 2) && (StemmerUtil.EndsWith(buffer, len, "ो") || StemmerUtil.EndsWith(buffer, len, "े") || StemmerUtil.EndsWith(buffer, len, "ू") || StemmerUtil.EndsWith(buffer, len, "ु") || StemmerUtil.EndsWith(buffer, len, "ी") || StemmerUtil.EndsWith(buffer, len, "ि") || StemmerUtil.EndsWith(buffer, len, "ा"))) { return(len - 1); } return(len); }
/// <summary> /// Stem an input buffer of Sorani text. /// </summary> /// <param name="s"> input buffer </param> /// <param name="len"> length of input buffer </param> /// <returns> length of input buffer after normalization </returns> public virtual int Stem(char[] s, int len) { // postposition if (len > 5 && StemmerUtil.EndsWith(s, len, "دا")) { len -= 2; } else if (len > 4 && StemmerUtil.EndsWith(s, len, "نا")) { len--; } else if (len > 6 && StemmerUtil.EndsWith(s, len, "ەوە")) { len -= 3; } // possessive pronoun if (len > 6 && (StemmerUtil.EndsWith(s, len, "مان") || StemmerUtil.EndsWith(s, len, "یان") || StemmerUtil.EndsWith(s, len, "تان"))) { len -= 3; } // indefinite singular ezafe if (len > 6 && StemmerUtil.EndsWith(s, len, "ێکی")) { return(len - 3); } else if (len > 7 && StemmerUtil.EndsWith(s, len, "یەکی")) { return(len - 4); } // indefinite singular if (len > 5 && StemmerUtil.EndsWith(s, len, "ێک")) { return(len - 2); } else if (len > 6 && StemmerUtil.EndsWith(s, len, "یەک")) { return(len - 3); } // definite singular else if (len > 6 && StemmerUtil.EndsWith(s, len, "ەکە")) { return(len - 3); } else if (len > 5 && StemmerUtil.EndsWith(s, len, "کە")) { return(len - 2); } // definite plural else if (len > 7 && StemmerUtil.EndsWith(s, len, "ەکان")) { return(len - 4); } else if (len > 6 && StemmerUtil.EndsWith(s, len, "کان")) { return(len - 3); } // indefinite plural ezafe else if (len > 7 && StemmerUtil.EndsWith(s, len, "یانی")) { return(len - 4); } else if (len > 6 && StemmerUtil.EndsWith(s, len, "انی")) { return(len - 3); } // indefinite plural else if (len > 6 && StemmerUtil.EndsWith(s, len, "یان")) { return(len - 3); } else if (len > 5 && StemmerUtil.EndsWith(s, len, "ان")) { return(len - 2); } // demonstrative plural else if (len > 7 && StemmerUtil.EndsWith(s, len, "یانە")) { return(len - 4); } else if (len > 6 && StemmerUtil.EndsWith(s, len, "انە")) { return(len - 3); } // demonstrative singular else if (len > 5 && (StemmerUtil.EndsWith(s, len, "ایە") || StemmerUtil.EndsWith(s, len, "ەیە"))) { return(len - 2); } else if (len > 4 && StemmerUtil.EndsWith(s, len, "ە")) { return(len - 1); } // absolute singular ezafe else if (len > 4 && StemmerUtil.EndsWith(s, len, "ی")) { return(len - 1); } return(len); }
private int Norm(char[] s, int len) { if (len > 4) { for (int i = 0; i < len; i++) { switch (s[i]) { case 'à': case 'á': case 'â': s[i] = 'a'; break; case 'ô': s[i] = 'o'; break; case 'è': case 'é': case 'ê': s[i] = 'e'; break; case 'ù': case 'û': s[i] = 'u'; break; case 'î': s[i] = 'i'; break; case 'ç': s[i] = 'c'; break; } } char ch = s[0]; for (int i = 1; i < len; i++) { if (s[i] == ch && char.IsLetter(ch)) { len = StemmerUtil.Delete(s, i--, len); } else { ch = s[i]; } } } if (len > 4 && StemmerUtil.EndsWith(s, len, "ie")) { len -= 2; } if (len > 4) { if (s[len - 1] == 'r') { len--; } if (s[len - 1] == 'e') { len--; } if (s[len - 1] == 'e') { len--; } if (s[len - 1] == s[len - 2] && char.IsLetter(s[len - 1])) { len--; } } return(len); }
private int RemoveParticle(char[] text, int length) { if (StemmerUtil.EndsWith(text, length, "kah") || StemmerUtil.EndsWith(text, length, "lah") || StemmerUtil.EndsWith(text, length, "pun")) { numSyllables--; return(length - 3); } return(length); }
public virtual int Stem(char[] s, int len) { if (len > 5 && s[len - 1] == 'x') { if (s[len - 3] == 'a' && s[len - 2] == 'u' && s[len - 4] != 'e') { s[len - 2] = 'l'; } len--; } if (len > 3 && s[len - 1] == 'x') { len--; } if (len > 3 && s[len - 1] == 's') { len--; } if (len > 9 && StemmerUtil.EndsWith(s, len, "issement")) { len -= 6; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 8 && StemmerUtil.EndsWith(s, len, "issant")) { len -= 4; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 6 && StemmerUtil.EndsWith(s, len, "ement")) { len -= 4; if (len > 3 && StemmerUtil.EndsWith(s, len, "ive")) { len--; s[len - 1] = 'f'; } return(Norm(s, len)); } if (len > 11 && StemmerUtil.EndsWith(s, len, "ficatrice")) { len -= 5; s[len - 2] = 'e'; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 10 && StemmerUtil.EndsWith(s, len, "ficateur")) { len -= 4; s[len - 2] = 'e'; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 9 && StemmerUtil.EndsWith(s, len, "catrice")) { len -= 3; s[len - 4] = 'q'; s[len - 3] = 'u'; s[len - 2] = 'e'; //s[len-1] = 'r' <-- unnecessary, already 'r'. return(Norm(s, len)); } if (len > 8 && StemmerUtil.EndsWith(s, len, "cateur")) { len -= 2; s[len - 4] = 'q'; s[len - 3] = 'u'; s[len - 2] = 'e'; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 8 && StemmerUtil.EndsWith(s, len, "atrice")) { len -= 4; s[len - 2] = 'e'; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 7 && StemmerUtil.EndsWith(s, len, "ateur")) { len -= 3; s[len - 2] = 'e'; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 6 && StemmerUtil.EndsWith(s, len, "trice")) { len--; s[len - 3] = 'e'; s[len - 2] = 'u'; s[len - 1] = 'r'; } if (len > 5 && StemmerUtil.EndsWith(s, len, "ième")) { return(Norm(s, len - 4)); } if (len > 7 && StemmerUtil.EndsWith(s, len, "teuse")) { len -= 2; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 6 && StemmerUtil.EndsWith(s, len, "teur")) { len--; s[len - 1] = 'r'; return(Norm(s, len)); } if (len > 5 && StemmerUtil.EndsWith(s, len, "euse")) { return(Norm(s, len - 2)); } if (len > 8 && StemmerUtil.EndsWith(s, len, "ère")) { len--; s[len - 2] = 'e'; return(Norm(s, len)); } if (len > 7 && StemmerUtil.EndsWith(s, len, "ive")) { len--; s[len - 1] = 'f'; return(Norm(s, len)); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "folle") || StemmerUtil.EndsWith(s, len, "molle"))) { len -= 2; s[len - 1] = 'u'; return(Norm(s, len)); } if (len > 9 && StemmerUtil.EndsWith(s, len, "nnelle")) { return(Norm(s, len - 5)); } if (len > 9 && StemmerUtil.EndsWith(s, len, "nnel")) { return(Norm(s, len - 3)); } if (len > 4 && StemmerUtil.EndsWith(s, len, "ète")) { len--; s[len - 2] = 'e'; } if (len > 8 && StemmerUtil.EndsWith(s, len, "ique")) { len -= 4; } if (len > 8 && StemmerUtil.EndsWith(s, len, "esse")) { return(Norm(s, len - 3)); } if (len > 7 && StemmerUtil.EndsWith(s, len, "inage")) { return(Norm(s, len - 3)); } if (len > 9 && StemmerUtil.EndsWith(s, len, "isation")) { len -= 7; if (len > 5 && StemmerUtil.EndsWith(s, len, "ual")) { s[len - 2] = 'e'; } return(Norm(s, len)); } if (len > 9 && StemmerUtil.EndsWith(s, len, "isateur")) { return(Norm(s, len - 7)); } if (len > 8 && StemmerUtil.EndsWith(s, len, "ation")) { return(Norm(s, len - 5)); } if (len > 8 && StemmerUtil.EndsWith(s, len, "ition")) { return(Norm(s, len - 5)); } return(Norm(s, len)); }
private int Step3(char[] s, int len) { if (len > 8) { if (StemmerUtil.EndsWith(s, len, "nnen")) { s[len - 4] = 's'; return(len - 3); } if (StemmerUtil.EndsWith(s, len, "ntena")) { s[len - 5] = 's'; return(len - 4); } if (StemmerUtil.EndsWith(s, len, "tten")) { return(len - 4); } if (StemmerUtil.EndsWith(s, len, "eiden")) { return(len - 5); } } if (len > 6) { if (StemmerUtil.EndsWith(s, len, "neen") || StemmerUtil.EndsWith(s, len, "niin") || StemmerUtil.EndsWith(s, len, "seen") || StemmerUtil.EndsWith(s, len, "teen") || StemmerUtil.EndsWith(s, len, "inen")) { return(len - 4); } if (s[len - 3] == 'h' && IsVowel(s[len - 2]) && s[len - 1] == 'n') { return(len - 3); } if (StemmerUtil.EndsWith(s, len, "den")) { s[len - 3] = 's'; return(len - 2); } if (StemmerUtil.EndsWith(s, len, "ksen")) { s[len - 4] = 's'; return(len - 3); } if (StemmerUtil.EndsWith(s, len, "ssa") || StemmerUtil.EndsWith(s, len, "sta") || StemmerUtil.EndsWith(s, len, "lla") || StemmerUtil.EndsWith(s, len, "lta") || StemmerUtil.EndsWith(s, len, "tta") || StemmerUtil.EndsWith(s, len, "ksi") || StemmerUtil.EndsWith(s, len, "lle")) { return(len - 3); } } if (len > 5) { if (StemmerUtil.EndsWith(s, len, "na") || StemmerUtil.EndsWith(s, len, "ne")) { return(len - 2); } if (StemmerUtil.EndsWith(s, len, "nei")) { return(len - 3); } } if (len > 4) { if (StemmerUtil.EndsWith(s, len, "ja") || StemmerUtil.EndsWith(s, len, "ta")) { return(len - 2); } if (s[len - 1] == 'a') { return(len - 1); } if (s[len - 1] == 'n' && IsVowel(s[len - 2])) { return(len - 2); } if (s[len - 1] == 'n') { return(len - 1); } } return(len); }
private int RemoveCase(char[] s, int len) { if (len > 7 && StemmerUtil.EndsWith(s, len, "atech")) { return(len - 5); } if (len > 6 && (StemmerUtil.EndsWith(s, len, "ětem") || StemmerUtil.EndsWith(s, len, "etem") || StemmerUtil.EndsWith(s, len, "atům"))) { return(len - 4); } if (len > 5 && (StemmerUtil.EndsWith(s, len, "ech") || StemmerUtil.EndsWith(s, len, "ich") || StemmerUtil.EndsWith(s, len, "ích") || StemmerUtil.EndsWith(s, len, "ého") || StemmerUtil.EndsWith(s, len, "ěmi") || StemmerUtil.EndsWith(s, len, "emi") || StemmerUtil.EndsWith(s, len, "ému") || StemmerUtil.EndsWith(s, len, "ěte") || StemmerUtil.EndsWith(s, len, "ete") || StemmerUtil.EndsWith(s, len, "ěti") || StemmerUtil.EndsWith(s, len, "eti") || StemmerUtil.EndsWith(s, len, "ího") || StemmerUtil.EndsWith(s, len, "iho") || StemmerUtil.EndsWith(s, len, "ími") || StemmerUtil.EndsWith(s, len, "ímu") || StemmerUtil.EndsWith(s, len, "imu") || StemmerUtil.EndsWith(s, len, "ách") || StemmerUtil.EndsWith(s, len, "ata") || StemmerUtil.EndsWith(s, len, "aty") || StemmerUtil.EndsWith(s, len, "ých") || StemmerUtil.EndsWith(s, len, "ama") || StemmerUtil.EndsWith(s, len, "ami") || StemmerUtil.EndsWith(s, len, "ové") || StemmerUtil.EndsWith(s, len, "ovi") || StemmerUtil.EndsWith(s, len, "ými"))) { return(len - 3); } if (len > 4 && (StemmerUtil.EndsWith(s, len, "em") || StemmerUtil.EndsWith(s, len, "es") || StemmerUtil.EndsWith(s, len, "ém") || StemmerUtil.EndsWith(s, len, "ím") || StemmerUtil.EndsWith(s, len, "ům") || StemmerUtil.EndsWith(s, len, "at") || StemmerUtil.EndsWith(s, len, "ám") || StemmerUtil.EndsWith(s, len, "os") || StemmerUtil.EndsWith(s, len, "us") || StemmerUtil.EndsWith(s, len, "ým") || StemmerUtil.EndsWith(s, len, "mi") || StemmerUtil.EndsWith(s, len, "ou"))) { return(len - 2); } if (len > 3) { switch (s[len - 1]) { case 'a': case 'e': case 'i': case 'o': case 'u': case 'ů': case 'y': case 'á': case 'é': case 'í': case 'ý': case 'ě': return(len - 1); } } return(len); }