/// <summary> /// Remove yet more endings. /// </summary> /// <param name="data">The data.</param> private void Step4(EnglishDataHolder data) { for (int i = 0; i < Step4Replacements.Length; ++i) { string end = Step4Replacements[i]; if (data.Word.EndsWith(end, StringComparison.Ordinal)) { if (data.R2.EndsWith(end, StringComparison.Ordinal)) { if (end == "ion") { char preChar = data.Word.Length > 4 ? data.Word[data.Word.Length - 4] : '\0'; if (preChar == 's' || preChar == 't') { data.Word = data.Word.Remove(data.Word.Length - Step4Replacements[i].Length); } } else { data.Word = data.Word.Remove(data.Word.Length - Step4Replacements[i].Length); } } return; } } }
/// <summary> /// Remove even more endings. /// </summary> /// <param name="data">The data.</param> private void Step5(EnglishDataHolder data) { if (data.R2.EndsWith("e", StringComparison.Ordinal) || (data.R1.EndsWith("e", StringComparison.Ordinal) && !IsShortSyllable(data.Word.ToCharArray(), data.Word.Length - 3))) { data.Word = data.Word.Remove(data.Word.Length - 1); } else if (data.R2.EndsWith("l", StringComparison.Ordinal) && data.Word.EndsWith("ll", StringComparison.Ordinal)) { data.Word = data.Word.Remove(data.Word.Length - 1); } }
/// <summary> /// Remove endings /// </summary> /// <param name="data">The data.</param> private void Step3(EnglishDataHolder data) { foreach (var Step3Replacement in Step3Replacements) { if (data.R1.EndsWith(Step3Replacement.Key, StringComparison.Ordinal)) { if (Step3Replacement.Key == "ative") { if (data.R2.EndsWith("ative", StringComparison.Ordinal)) { data.Word = data.Word.Remove(data.Word.Length - Step3Replacement.Key.Length); } } else { data.Word = data.Word.Remove(data.Word.Length - Step3Replacement.Key.Length); data.Word += Step3Replacement.Value; } return; } } }
/// <summary> /// Removes endings /// </summary> /// <param name="data">The data.</param> private void Step2(EnglishDataHolder data) { foreach (var Step2Replacement in Step2Replacements) { if (data.Word.EndsWith(Step2Replacement.Key, StringComparison.Ordinal)) { if (data.R1.EndsWith(Step2Replacement.Key, StringComparison.Ordinal)) { if (Step2Replacement.Key == "ogi") { if (data.Word.EndsWith("logi", StringComparison.Ordinal)) { data.Word = data.Word.Remove(data.Word.Length - 1); } } else if (Step2Replacement.Key == "li") { if (data.Word.Length >= 3) { string liEnding = data.Word.Substring(data.Word.Length - 3, 1); if (ValidLiEndings.Contains(liEnding)) { data.Word = data.Word.Remove(data.Word.Length - 2); break; } } } else if (data.Word.Length >= Step2Replacement.Key.Length) { data.Word = data.Word.Remove(data.Word.Length - Step2Replacement.Key.Length); data.Word += Step2Replacement.Value; } } break; } } }
/// <summary> /// Removes endings. /// </summary> /// <param name="dataHolder">The data holder.</param> /// <returns>The word minus the endings.</returns> private void Step1B(EnglishDataHolder dataHolder) { for (int i = 0, Step1ReplacementsLength = Step1Replacements.Length; i < Step1ReplacementsLength; i++) { var Step1Replacement = Step1Replacements[i]; if (Step1Replacement == "eedly" && dataHolder.R1.EndsWith("eedly", StringComparison.Ordinal)) { dataHolder.Word = dataHolder.Word.Length >= 2 ? dataHolder.Word.Remove(dataHolder.Word.Length - 2, 2) : dataHolder.Word; return; } else if (Step1Replacement == "eed" && dataHolder.R1.EndsWith("eed", StringComparison.Ordinal)) { dataHolder.Word = dataHolder.Word.Length >= 1 ? dataHolder.Word.Remove(dataHolder.Word.Length - 1, 1) : dataHolder.Word; return; } else if (dataHolder.Word.EndsWith(Step1Replacement, StringComparison.Ordinal)) { var Chars = dataHolder.Word.ToCharArray(); bool vowelIsFound = false; if (Chars.Length > Step1Replacement.Length) { for (int x = 0; x < Chars.Length - Step1Replacement.Length; x++) { if (IsVowel(Chars[x])) { dataHolder.Word = dataHolder.Word.Remove(dataHolder.Word.Length - Step1Replacement.Length); vowelIsFound = true; break; } } } if (vowelIsFound) { dataHolder.R1 = dataHolder.R1Index < dataHolder.Word.Length ? dataHolder.Word.Substring(dataHolder.R1Index) : ""; dataHolder.R2 = dataHolder.R2Index < dataHolder.Word.Length ? dataHolder.Word.Substring(dataHolder.R2Index) : ""; if (dataHolder.Word.EndsWith("at", StringComparison.Ordinal) || dataHolder.Word.EndsWith("bl", StringComparison.Ordinal) || dataHolder.Word.EndsWith("iz", StringComparison.Ordinal)) { dataHolder.Word += "e"; } else { bool ContinueProcessing = true; for (int x = 0; x < Doubles.Length; x++) { if (dataHolder.Word.EndsWith(Doubles[x], StringComparison.Ordinal)) { dataHolder.Word = dataHolder.Word.Remove(dataHolder.Word.Length - 1, 1); ContinueProcessing = false; break; } } if (ContinueProcessing && IsShortWord(dataHolder.Word, dataHolder.R1)) { dataHolder.Word += "e"; } } } return; } } }
/// <summary> /// Sets the r1 and r2. /// </summary> /// <param name="Data">The data.</param> private static void SetR1AndR2(EnglishDataHolder Data) { Data.R1 = Data.R1Index < Data.Word.Length ? Data.Word.Substring(Data.R1Index) : ""; Data.R2 = Data.R2Index < Data.Word.Length ? Data.Word.Substring(Data.R2Index) : ""; }
/// <summary> /// Stems the word. /// </summary> /// <param name="word">The word.</param> /// <returns>The stemmed word.</returns> protected override string StemWord(string word) { if (string.IsNullOrEmpty(word) || word.Length < 3) { return(word); } //Clean up word. word = word.ToLowerInvariant(); if (word.StartsWith("'", StringComparison.Ordinal)) { word = word.Substring(1); } //Check for exceptions if (Exceptions.ContainsKey(word)) { return(Exceptions[word]); } //Set initial y, or y after a vowel, to Y. if (word.Length > 0) { var Chars = word.ToCharArray(); if (Chars[0] == 'y') { Chars[0] = 'Y'; } if (Chars.Length > 1) { for (int x = 1; x < Chars.Length; x++) { if (Chars[x] == 'y' && IsVowel(Chars[x - 1])) { Chars[x] = 'Y'; } } } word = new string(Chars); } var Data = new EnglishDataHolder { Word = word }; (Data.R1Index, Data.R2Index) = CalculateR1AndR2(Data.Word); Data.Word = Step0(Data.Word); Data.Word = Step1A(Data.Word); //Check Exceptions if (Exceptions2.Contains(Data.Word)) { return(Data.Word); } SetR1AndR2(Data); Step1B(Data); SetR1AndR2(Data); Data.Word = Step1C(Data.Word); SetR1AndR2(Data); Step2(Data); SetR1AndR2(Data); Step3(Data); SetR1AndR2(Data); Step4(Data); SetR1AndR2(Data); Step5(Data); return(Data.Word.ToLowerInvariant()); }