/// <summary> /// Search for the longest among the following suffixes, and perform the action indicated. /// eed eedly+ /// replace by ee if in R1 /// ed edly+ ing ingly+ /// delete if the preceding word part contains a vowel, and then /// if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or /// if the word ends with a double remove the last letter (so hopp -> hop), or /// if the word is short, add e (so hop -> hope) /// </summary> internal void StripSuffixStep1b() { // eed eedly+ - replace by ee if in R1 if (Stem.EndsWith("eed") || Stem.EndsWith("eedly")) { if (EndsWithAndInR1("eed") || EndsWithAndInR1("eedly")) { if (_r1.Contains(Stem.Length)) { Stem = Stem.Substring(0, Stem.LastIndexOf("eed")) + "ee"; } } return; } // ed edly+ ing ingly+ - delete if the preceding word part contains a vowel, and then if ((Stem.EndsWith("ed") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 2) != -1) || (Stem.EndsWith("edly") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 4) != -1) || (Stem.EndsWith("ing") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 3) != -1) || (Stem.EndsWith("ingly") && Stem.IndexOfAny(Vowels, 0, Stem.Length - 5) != -1)) { StripEnding(new string[4] { "ed", "edly", "ing", "ingly" }); // if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or if (Stem.EndsWith("at") || Stem.EndsWith("bl") || Stem.EndsWith("iz")) { Stem += "e"; return; } // if the word ends with a double remove the last letter (so hopp -> hop), or string end2chars; if (Stem.Length >= 2) { end2chars = Stem.Substring(Stem.Length - 2, 2); } else { return; } var doubleEndings = new List <string>(DoubleChars); if (doubleEndings.Contains(end2chars)) { Stem = Stem.Remove(Stem.Length - 1); return; } // if the word is short, add e (so hop -> hope) if (IsShortWord()) { Stem += "e"; return; } } }
/// <summary> /// replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) /// </summary> internal void ReplaceSuffixStep1c() { //replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) if (Stem.EndsWith("y", StringComparison.OrdinalIgnoreCase) && (Stem.Length > 2) && (Stem.IndexOfAny(Vowels, Stem.Length - 2) != Stem.Length - 2)) { Stem = Stem.Substring(0, Stem.Length - 1) + "i"; } }
private bool StripEnding(string[] endings) { foreach (string ending in endings) { if (Stem.EndsWith(ending)) { Stem = Stem.Remove(Stem.Length - ending.Length); return(true); } } return(false); }
/// <summary> /// Step 4: Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated. /// al ance ence er ic able ible ant ement ment ent ism ate iti ous ive ize delete /// ion delete if preceded by s or t /// </summary> internal void StripSuffixStep4() { if (EndsWithAndInR2("ement")) { Stem = Stem.Remove(Stem.Length - 5); return; } if (EndsWithAndInR2("ance") || EndsWithAndInR2("ence") || EndsWithAndInR2("able") || EndsWithAndInR2("ible") || EndsWithAndInR2("ment")) { Stem = Stem.Remove(Stem.Length - 4); return; } if (EndsWithAndInR2("ion") && (Stem.EndsWith("tion") || Stem.EndsWith("sion"))) { Stem = Stem.Remove(Stem.Length - 3); return; } if (Stem.EndsWith("ment")) { return; //breaking change, but makes the voc.txt parse correctly } if (EndsWithAndInR2("ant") || EndsWithAndInR2("ent") || EndsWithAndInR2("ism") || EndsWithAndInR2("ate") || EndsWithAndInR2("iti") || EndsWithAndInR2("ous") || EndsWithAndInR2("ize") || EndsWithAndInR2("ive")) { Stem = Stem.Remove(Stem.Length - 3); return; } if (EndsWithAndInR2("al") || EndsWithAndInR2("er") || EndsWithAndInR2("ic") ) { Stem = Stem.Remove(Stem.Length - 2); return; } }
/// <summary> /// Step 5: * Search for the the following suffixes, and, if found, perform the action indicated. ///e delete if in R2, or in R1 and not preceded by a short syllable /// l delete if in R2 and preceded by l /// </summary> internal void StripSuffixStep5() { if (EndsWithAndInR2("e") || (EndsWithAndInR1("e") && IsShortSyllable(Stem.Length - 3) == false)) { Stem = Stem.Remove(Stem.Length - 1); return; } if (EndsWithAndInR2("l") && Stem.EndsWith("ll")) { Stem = Stem.Remove(Stem.Length - 1); return; } }
/// <summary> /// Handle the three forms of closing apostrophe /// </summary> internal void StripTrailingApostrophe() { if (Stem.EndsWith("'s'")) { Stem = Stem.Substring(0, Stem.Length - 3); return; } if (Stem.EndsWith("'s")) { Stem = Stem.Substring(0, Stem.Length - 2); return; } if (Stem.EndsWith("'")) { Stem = Stem.Substring(0, Stem.Length - 1); return; } }
/// <summary> /// Search for the longest among the following suffixes, and perform the action indicated. /// sses /// replace by ss /// /// ied+ ies* /// replace by i if preceded by more than one letter, otherwise by ie (so ties -> tie, cries -> cri) /// /// s /// delete if the preceding word part contains a vowel not immediately before the s (so gas and this retain the s, gaps and kiwis lose it) /// /// us+ ss /// do nothing /// </summary> internal void StripSuffixStep1a() { //sses - replace by ss if (Stem.EndsWith("sses")) { Stem = Stem.Substring(0, Stem.Length - 2); //4 to remove sses -2 to re-introduce ss return; } //ied+ ies* - replace by i if preceded by more than one letter, otherwise by ie (so ties -> tie, cries -> cri) if (Stem.EndsWith("ies") || Stem.EndsWith("ied")) { if (Stem.Length > 4) { Stem = Stem.Substring(0, Stem.Length - 2); } else { Stem = Stem.Substring(0, Stem.Length - 1); } return; } //us+ ss - do nothing if (Stem.EndsWith("us") || Stem.EndsWith("ss")) { return; } //s - delete if the preceding word part contains a vowel not immediately // before the s (so gas and this retain the s, gaps and kiwis lose it) if (Stem.EndsWith("s") && Stem.Length > 2 && Stem.Substring(0, Stem.Length - 2).IndexOfAny(Vowels) > -1) { Stem = Stem.Substring(0, Stem.Length - 1); return; } }
private bool EndsWithAndInR2(string suffix) { return(Stem.EndsWith(suffix) && GetR2().Contains(suffix)); }