Exemple #1
0
        /// <summary>
        /// Gets a Hashtable of words and integers representing the number of each word.
        /// </summary>
        /// <param name="input">The string to get the word frequency of.</param>
        /// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
        /// <param name="tokenizer">A instance of ITokenizer.</param>
        /// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
        /// <returns></returns>
        public static Hashtable GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
        {
            string convertedInput = input;

            if (!caseSensitive)
            {
                convertedInput = input.ToLower();
            }

            string[] words = tokenizer.Tokenize(convertedInput);
            Array.Sort(words);

            string[] uniqueWords = GetUniqueWords(words);

            Hashtable result = new Hashtable();

            for (int i = 0; i < uniqueWords.Length; i++)
            {
                if (stopWordProvider == null || (IsWord(uniqueWords[i]) && !stopWordProvider.IsStopWord(uniqueWords[i])))
                {
                    if (result.ContainsKey(uniqueWords[i]))
                    {
                        result[uniqueWords[i]] = (int)result[uniqueWords[i]] + CountWords(uniqueWords[i], words);
                    }
                    else
                    {
                        result.Add(uniqueWords[i], CountWords(uniqueWords[i], words));
                    }
                }
            }

            return(result);
        }
		/// <summary>
		/// Gets a Hashtable of words and integers representing the number of each word.
		/// </summary>
		/// <param name="input">The string to get the word frequency of.</param>
		/// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
		/// <param name="tokenizer">A instance of ITokenizer.</param>
		/// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
		/// <returns></returns>
		public static Hashtable GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
		{
			string convertedInput = input;
			if (!caseSensitive)
				convertedInput = input.ToLower();

			string[] words = tokenizer.Tokenize(convertedInput);
			Array.Sort(words);

			string[] uniqueWords = GetUniqueWords(words);

			Hashtable result = new Hashtable();
			for (int i = 0; i < uniqueWords.Length; i++)
			{
				if (stopWordProvider == null || (IsWord(uniqueWords[i]) && !stopWordProvider.IsStopWord(uniqueWords[i])))
				{
					if (result.ContainsKey(uniqueWords[i]))
						result[uniqueWords[i]] = (int)result[uniqueWords[i]] + CountWords(uniqueWords[i], words);
					else
						result.Add(uniqueWords[i], CountWords(uniqueWords[i], words));
				}
			}

			return result;
		}
Exemple #3
0
		/// <summary>
		/// Gets a dictionary of words and integers representing the number of each word.
		/// </summary>
		/// <param name="input">The string to get the word frequency of.</param>
		/// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
		/// <param name="tokenizer">A instance of ITokenizer.</param>
		/// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
		/// <returns></returns>
		public static IDictionary<string, int> GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
		{
			var convertedInput = input;
			if (!caseSensitive)
			{
				convertedInput = input.ToLower();
			}

			var words = tokenizer.Tokenize(convertedInput);
			Array.Sort(words);

			var uniqueWords = GetUniqueWords(words);

			var result = new Dictionary<string, int>();
			for (var i = 0; i < uniqueWords.Length; i++)
			{
				var word = uniqueWords[i];

				if (stopWordProvider == null || (IsWord(word) && !stopWordProvider.IsStopWord(word)))
				{
					int value;
					if (result.TryGetValue(word, out value))
					{
						result[word] = value + CountWords(word, words);
					}
					else
					{
						result.Add(word, CountWords(word, words));
					}
				}
			}

			return result;
		}
Exemple #4
0
 private bool IsClassifiableWord(string word)
 {
     if (word == null || word == string.Empty || _stopWordProvider.IsStopWord(word))
     {
         return(false);
     }
     else
     {
         return(true);
     }
 }
Exemple #5
0
        /// <summary>
        /// Gets a dictionary of words and integers representing the number of each word.
        /// </summary>
        /// <param name="input">The string to get the word frequency of.</param>
        /// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
        /// <param name="tokenizer">A instance of ITokenizer.</param>
        /// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
        /// <returns></returns>
        public static IDictionary<string, int> GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
        {
            var convertedInput = input;
            if (!caseSensitive)
            {
                convertedInput = input.ToLower();
            }

            var words = tokenizer.Tokenize(convertedInput);
            Array.Sort(words);

            var uniqueWords = GetUniqueWords(words);

            var result = new Dictionary<string, int>();
            for (var i = 0; i < uniqueWords.Length; i++)
            {
                var word = uniqueWords[i];

                if (stopWordProvider == null || (IsWord(word) && !stopWordProvider.IsStopWord(word)))
                {
                    int value;
                    if (result.TryGetValue(word, out value))
                    {
                        result[word] = value + CountWords(word, words);
                    }
                    else
                    {
                        result.Add(word, CountWords(word, words));
                    }
                }
            }

            return result;
        }
Exemple #6
0
 private bool IsClassifiableWord(string word)
 {
     return(!string.IsNullOrEmpty(word) && !_stopWordProvider.IsStopWord(word));
 }