예제 #1
0
        /// <summary>
        /// Remove yet more endings.
        /// </summary>
        /// <param name="data">The data.</param>
        private void Step4(EnglishDataHolder data)
        {
            for (int i = 0; i < Step4Replacements.Length; ++i)
            {
                string end = Step4Replacements[i];

                if (data.Word.EndsWith(end, StringComparison.Ordinal))
                {
                    if (data.R2.EndsWith(end, StringComparison.Ordinal))
                    {
                        if (end == "ion")
                        {
                            char preChar = data.Word.Length > 4 ? data.Word[data.Word.Length - 4] : '\0';

                            if (preChar == 's' || preChar == 't')
                            {
                                data.Word = data.Word.Remove(data.Word.Length - Step4Replacements[i].Length);
                            }
                        }
                        else
                        {
                            data.Word = data.Word.Remove(data.Word.Length - Step4Replacements[i].Length);
                        }
                    }

                    return;
                }
            }
        }
예제 #2
0
 /// <summary>
 /// Remove even more endings.
 /// </summary>
 /// <param name="data">The data.</param>
 private void Step5(EnglishDataHolder data)
 {
     if (data.R2.EndsWith("e", StringComparison.Ordinal) ||
         (data.R1.EndsWith("e", StringComparison.Ordinal) && !IsShortSyllable(data.Word.ToCharArray(), data.Word.Length - 3)))
     {
         data.Word = data.Word.Remove(data.Word.Length - 1);
     }
     else if (data.R2.EndsWith("l", StringComparison.Ordinal) && data.Word.EndsWith("ll", StringComparison.Ordinal))
     {
         data.Word = data.Word.Remove(data.Word.Length - 1);
     }
 }
예제 #3
0
 /// <summary>
 /// Remove endings
 /// </summary>
 /// <param name="data">The data.</param>
 private void Step3(EnglishDataHolder data)
 {
     foreach (var Step3Replacement in Step3Replacements)
     {
         if (data.R1.EndsWith(Step3Replacement.Key, StringComparison.Ordinal))
         {
             if (Step3Replacement.Key == "ative")
             {
                 if (data.R2.EndsWith("ative", StringComparison.Ordinal))
                 {
                     data.Word = data.Word.Remove(data.Word.Length - Step3Replacement.Key.Length);
                 }
             }
             else
             {
                 data.Word  = data.Word.Remove(data.Word.Length - Step3Replacement.Key.Length);
                 data.Word += Step3Replacement.Value;
             }
             return;
         }
     }
 }
예제 #4
0
 /// <summary>
 /// Removes endings
 /// </summary>
 /// <param name="data">The data.</param>
 private void Step2(EnglishDataHolder data)
 {
     foreach (var Step2Replacement in Step2Replacements)
     {
         if (data.Word.EndsWith(Step2Replacement.Key, StringComparison.Ordinal))
         {
             if (data.R1.EndsWith(Step2Replacement.Key, StringComparison.Ordinal))
             {
                 if (Step2Replacement.Key == "ogi")
                 {
                     if (data.Word.EndsWith("logi", StringComparison.Ordinal))
                     {
                         data.Word = data.Word.Remove(data.Word.Length - 1);
                     }
                 }
                 else if (Step2Replacement.Key == "li")
                 {
                     if (data.Word.Length >= 3)
                     {
                         string liEnding = data.Word.Substring(data.Word.Length - 3, 1);
                         if (ValidLiEndings.Contains(liEnding))
                         {
                             data.Word = data.Word.Remove(data.Word.Length - 2);
                             break;
                         }
                     }
                 }
                 else if (data.Word.Length >= Step2Replacement.Key.Length)
                 {
                     data.Word  = data.Word.Remove(data.Word.Length - Step2Replacement.Key.Length);
                     data.Word += Step2Replacement.Value;
                 }
             }
             break;
         }
     }
 }
예제 #5
0
        /// <summary>
        /// Removes endings.
        /// </summary>
        /// <param name="dataHolder">The data holder.</param>
        /// <returns>The word minus the endings.</returns>
        private void Step1B(EnglishDataHolder dataHolder)
        {
            for (int i = 0, Step1ReplacementsLength = Step1Replacements.Length; i < Step1ReplacementsLength; i++)
            {
                var Step1Replacement = Step1Replacements[i];
                if (Step1Replacement == "eedly" && dataHolder.R1.EndsWith("eedly", StringComparison.Ordinal))
                {
                    dataHolder.Word = dataHolder.Word.Length >= 2 ? dataHolder.Word.Remove(dataHolder.Word.Length - 2, 2) : dataHolder.Word;
                    return;
                }
                else if (Step1Replacement == "eed" && dataHolder.R1.EndsWith("eed", StringComparison.Ordinal))
                {
                    dataHolder.Word = dataHolder.Word.Length >= 1 ? dataHolder.Word.Remove(dataHolder.Word.Length - 1, 1) : dataHolder.Word;
                    return;
                }
                else if (dataHolder.Word.EndsWith(Step1Replacement, StringComparison.Ordinal))
                {
                    var Chars = dataHolder.Word.ToCharArray();

                    bool vowelIsFound = false;

                    if (Chars.Length > Step1Replacement.Length)
                    {
                        for (int x = 0; x < Chars.Length - Step1Replacement.Length; x++)
                        {
                            if (IsVowel(Chars[x]))
                            {
                                dataHolder.Word = dataHolder.Word.Remove(dataHolder.Word.Length - Step1Replacement.Length);
                                vowelIsFound    = true;
                                break;
                            }
                        }
                    }

                    if (vowelIsFound)
                    {
                        dataHolder.R1 = dataHolder.R1Index < dataHolder.Word.Length ? dataHolder.Word.Substring(dataHolder.R1Index) : "";
                        dataHolder.R2 = dataHolder.R2Index < dataHolder.Word.Length ? dataHolder.Word.Substring(dataHolder.R2Index) : "";

                        if (dataHolder.Word.EndsWith("at", StringComparison.Ordinal) ||
                            dataHolder.Word.EndsWith("bl", StringComparison.Ordinal) ||
                            dataHolder.Word.EndsWith("iz", StringComparison.Ordinal))
                        {
                            dataHolder.Word += "e";
                        }
                        else
                        {
                            bool ContinueProcessing = true;
                            for (int x = 0; x < Doubles.Length; x++)
                            {
                                if (dataHolder.Word.EndsWith(Doubles[x], StringComparison.Ordinal))
                                {
                                    dataHolder.Word    = dataHolder.Word.Remove(dataHolder.Word.Length - 1, 1);
                                    ContinueProcessing = false;
                                    break;
                                }
                            }

                            if (ContinueProcessing && IsShortWord(dataHolder.Word, dataHolder.R1))
                            {
                                dataHolder.Word += "e";
                            }
                        }
                    }
                    return;
                }
            }
        }
예제 #6
0
 /// <summary>
 /// Sets the r1 and r2.
 /// </summary>
 /// <param name="Data">The data.</param>
 private static void SetR1AndR2(EnglishDataHolder Data)
 {
     Data.R1 = Data.R1Index < Data.Word.Length ? Data.Word.Substring(Data.R1Index) : "";
     Data.R2 = Data.R2Index < Data.Word.Length ? Data.Word.Substring(Data.R2Index) : "";
 }
예제 #7
0
        /// <summary>
        /// Stems the word.
        /// </summary>
        /// <param name="word">The word.</param>
        /// <returns>The stemmed word.</returns>
        protected override string StemWord(string word)
        {
            if (string.IsNullOrEmpty(word) || word.Length < 3)
            {
                return(word);
            }

            //Clean up word.
            word = word.ToLowerInvariant();

            if (word.StartsWith("'", StringComparison.Ordinal))
            {
                word = word.Substring(1);
            }

            //Check for exceptions
            if (Exceptions.ContainsKey(word))
            {
                return(Exceptions[word]);
            }

            //Set initial y, or y after a vowel, to Y.
            if (word.Length > 0)
            {
                var Chars = word.ToCharArray();
                if (Chars[0] == 'y')
                {
                    Chars[0] = 'Y';
                }
                if (Chars.Length > 1)
                {
                    for (int x = 1; x < Chars.Length; x++)
                    {
                        if (Chars[x] == 'y' && IsVowel(Chars[x - 1]))
                        {
                            Chars[x] = 'Y';
                        }
                    }
                }
                word = new string(Chars);
            }

            var Data = new EnglishDataHolder
            {
                Word = word
            };

            (Data.R1Index, Data.R2Index) = CalculateR1AndR2(Data.Word);

            Data.Word = Step0(Data.Word);
            Data.Word = Step1A(Data.Word);

            //Check Exceptions
            if (Exceptions2.Contains(Data.Word))
            {
                return(Data.Word);
            }

            SetR1AndR2(Data);

            Step1B(Data);

            SetR1AndR2(Data);

            Data.Word = Step1C(Data.Word);

            SetR1AndR2(Data);

            Step2(Data);

            SetR1AndR2(Data);

            Step3(Data);

            SetR1AndR2(Data);

            Step4(Data);

            SetR1AndR2(Data);

            Step5(Data);

            return(Data.Word.ToLowerInvariant());
        }