Ejemplo n.º 1
0
        /// <summary>
        /// Gets a cleaned string with stop words removed, stemmed, and tokenized.
        /// </summary>
        /// <param name="sentence"></param>
        /// <returns></returns>
        public string[] Process(string sentence)
        {
            var tokens = sentence.Tokenize();

            if (!RemoveStopWords && !StemWords)
            {
                return(tokens);
            }

            var result = new List <string>();

            foreach (string token in tokens)
            {
                if (RemoveStopWords && StemWords)
                {
                    if (StopWords.All(stopWord => !string.Equals(token, stopWord, StringComparison.InvariantCultureIgnoreCase)))
                    {
                        result.Add(_stemmer.Stem(token));
                    }
                }
                else if (!RemoveStopWords && StemWords)
                {
                    result.Add(_stemmer.Stem(token));
                }
                else if (RemoveStopWords && !StemWords)
                {
                    if (StopWords.All(stopWord => !string.Equals(token, stopWord, StringComparison.InvariantCultureIgnoreCase)))
                    {
                        result.Add(token);
                    }
                }
            }

            return(result.ToArray());
        }