/// <summary> /// Search for the longest among the following suffixes, and perform the action indicated. /// eed eedly+ /// replace by ee if in R1 /// ed edly+ ing ingly+ /// delete if the preceding word part contains a vowel, and then /// if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or /// if the word ends with a double remove the last letter (so hopp -> hop), or /// if the word is short, add e (so hop -> hope) /// </summary> internal void StripSuffixStep1b() { // eed eedly+ - replace by ee if in R1 if (Stem.EndsWith("eed") || Stem.EndsWith("eedly")) { if (EndsWithAndInR1("eed") || EndsWithAndInR1("eedly")) { if (_r1.Contains(Stem.Length)) { Stem = Stem.Substring(0, Stem.LastIndexOf("eed")) + "ee"; } } return; } // ed edly+ ing ingly+ - delete if the preceding word part contains a vowel, and then if ((Stem.EndsWith("ed") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 2) != -1) || (Stem.EndsWith("edly") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 4) != -1) || (Stem.EndsWith("ing") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 3) != -1) || (Stem.EndsWith("ingly") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 5) != -1)) { StripEnding(new string[4] { "ed", "edly", "ing", "ingly" }); // if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or if (Stem.EndsWith("at") || Stem.EndsWith("bl") || Stem.EndsWith("iz")) { Stem += "e"; return; } // if the word ends with a double remove the last letter (so hopp -> hop), or string end2chars; if (Stem.Length >= 2) { end2chars = Stem.Substring(Stem.Length - 2, 2); } else { return; } var doubleEndings = new List <string>(DoubleChars); if (doubleEndings.Contains(end2chars)) { Stem = Stem.Remove(Stem.Length - 1); return; } // if the word is short, add e (so hop -> hope) if (IsShortWord()) { Stem += "e"; return; } } }
/// <summary> /// Converts all quote variants `’ " to standard '. Removes an open quote in First char /// </summary> internal void StandardiseApostrophesAndStripLeading() { //Make Apostrophes consistent Stem = Stem.Replace('’', '\'').Replace('`', '\'').Replace('"', '\''); //Remove initial ', if present. if (Stem[0] == '\'') { Stem = Stem.Remove(0, 1); } }
private bool StripEnding(string[] endings) { foreach (string ending in endings) { if (Stem.EndsWith(ending)) { Stem = Stem.Remove(Stem.Length - ending.Length); return(true); } } return(false); }
/// <summary> /// Step 4: Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated. /// al ance ence er ic able ible ant ement ment ent ism ate iti ous ive ize delete /// ion delete if preceded by s or t /// </summary> internal void StripSuffixStep4() { if (EndsWithAndInR2("ement")) { Stem = Stem.Remove(Stem.Length - 5); return; } if (EndsWithAndInR2("ance") || EndsWithAndInR2("ence") || EndsWithAndInR2("able") || EndsWithAndInR2("ible") || EndsWithAndInR2("ment")) { Stem = Stem.Remove(Stem.Length - 4); return; } if (EndsWithAndInR2("ion") && (Stem.EndsWith("tion") || Stem.EndsWith("sion"))) { Stem = Stem.Remove(Stem.Length - 3); return; } if (Stem.EndsWith("ment")) { return; //breaking change, but makes the voc.txt parse correctly } if (EndsWithAndInR2("ant") || EndsWithAndInR2("ent") || EndsWithAndInR2("ism") || EndsWithAndInR2("ate") || EndsWithAndInR2("iti") || EndsWithAndInR2("ous") || EndsWithAndInR2("ize") || EndsWithAndInR2("ive")) { Stem = Stem.Remove(Stem.Length - 3); return; } if (EndsWithAndInR2("al") || EndsWithAndInR2("er") || EndsWithAndInR2("ic") ) { Stem = Stem.Remove(Stem.Length - 2); return; } }
/// <summary> /// Step 5: * Search for the the following suffixes, and, if found, perform the action indicated. ///e delete if in R2, or in R1 and not preceded by a short syllable /// l delete if in R2 and preceded by l /// </summary> internal void StripSuffixStep5() { if (EndsWithAndInR2("e") || (EndsWithAndInR1("e") && IsShortSyllable(Stem.Length - 3) == false)) { Stem = Stem.Remove(Stem.Length - 1); return; } if (EndsWithAndInR2("l") && Stem.EndsWith("ll")) { Stem = Stem.Remove(Stem.Length - 1); return; } }