/// <summary>
		/// Gets a Hashtable of words and integers representing the number of each word.
		/// </summary>
		/// <param name="input">The string to get the word frequency of.</param>
		/// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
		/// <param name="tokenizer">A instance of ITokenizer.</param>
		/// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
		/// <returns></returns>
		public static Hashtable GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
		{
			string convertedInput = input;
			if (!caseSensitive)
				convertedInput = input.ToLower();

			string[] words = tokenizer.Tokenize(convertedInput);
			Array.Sort(words);

			string[] uniqueWords = GetUniqueWords(words);

			Hashtable result = new Hashtable();
			for (int i = 0; i < uniqueWords.Length; i++)
			{
				if (stopWordProvider == null || (IsWord(uniqueWords[i]) && !stopWordProvider.IsStopWord(uniqueWords[i])))
				{
					if (result.ContainsKey(uniqueWords[i]))
						result[uniqueWords[i]] = (int)result[uniqueWords[i]] + CountWords(uniqueWords[i], words);
					else
						result.Add(uniqueWords[i], CountWords(uniqueWords[i], words));
				}
			}

			return result;
		}
 public Integration(
     IUserInterfacePortal userInterfacePortal,
     IManuscriptProvider manuscriptProvider,
     IStopWordProvider stopWordProvider)
 {
     _userInterfacePortal = userInterfacePortal;
     _manuscriptProvider  = manuscriptProvider;
     _stopWordProvider    = stopWordProvider;
 }
示例#3
0
        public IntegrationTests()
        {
            _userInterfacePortal = Substitute.For <IUserInterfacePortal>();
            _manuscriptProvider  = new ManuscriptProvider();
            _stopWordProvider    = new StopwordProvider(TestStopwordListPath);

            _target = new Integration(
                _userInterfacePortal,
                _manuscriptProvider,
                _stopWordProvider);
        }
        public BayesianClassifier(IWordsDataSource wd, ITokenizer tokenizer, IStopWordProvider swp)
        {
            if (wd == null)
                throw new ArgumentNullException("IWordsDataSource cannot be null.");
            _wordsData = wd;

            if (tokenizer == null)
                throw new ArgumentNullException("ITokenizer cannot be null.");
            _tokenizer = tokenizer;

            if (swp == null)
                throw new ArgumentNullException("IStopWordProvider cannot be null.");
            _stopWordProvider = swp;
        }
示例#5
0
        public BayesianClassifier(IWordsDataSource wd, ITokenizer tokenizer, IStopWordProvider swp)
        {
            if (wd == null)
            {
                throw new ArgumentNullException("IWordsDataSource cannot be null.");
            }
            _wordsData = wd;

            if (tokenizer == null)
            {
                throw new ArgumentNullException("ITokenizer cannot be null.");
            }
            _tokenizer = tokenizer;

            if (swp == null)
            {
                throw new ArgumentNullException("IStopWordProvider cannot be null.");
            }
            _stopWordProvider = swp;
        }
        public BayesianClassifier(IWordsDataSource wordsDataSource, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
        {
            if (wordsDataSource == null)
            {
                throw new ArgumentNullException("wordsDataSource");
            }

            _wordsData = wordsDataSource;

            if (tokenizer == null)
            {
                throw new ArgumentNullException("tokenizer");
            }

            _tokenizer = tokenizer;

            if (stopWordProvider == null)
            {
                throw new ArgumentNullException("stopWordProvider");
            }

            _stopWordProvider = stopWordProvider;
        }
示例#7
0
        public BayesianClassifier(IWordsDataSource wordsDataSource, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
        {
            if (wordsDataSource == null)
            {
                throw new ArgumentNullException("wordsDataSource");
            }

            _wordsData = wordsDataSource;

            if (tokenizer == null)
            {
                throw new ArgumentNullException("tokenizer");
            }

            _tokenizer = tokenizer;

            if (stopWordProvider == null)
            {
                throw new ArgumentNullException("stopWordProvider");
            }

            _stopWordProvider = stopWordProvider;
        }
示例#8
0
        /// <summary>
        /// Gets a dictionary of words and integers representing the number of each word.
        /// </summary>
        /// <param name="input">The string to get the word frequency of.</param>
        /// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
        /// <param name="tokenizer">A instance of ITokenizer.</param>
        /// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
        /// <returns></returns>
        public static IDictionary<string, int> GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
        {
            var convertedInput = input;
            if (!caseSensitive)
            {
                convertedInput = input.ToLower();
            }

            var words = tokenizer.Tokenize(convertedInput);
            Array.Sort(words);

            var uniqueWords = GetUniqueWords(words);

            var result = new Dictionary<string, int>();
            for (var i = 0; i < uniqueWords.Length; i++)
            {
                var word = uniqueWords[i];

                if (stopWordProvider == null || (IsWord(word) && !stopWordProvider.IsStopWord(word)))
                {
                    int value;
                    if (result.TryGetValue(word, out value))
                    {
                        result[word] = value + CountWords(word, words);
                    }
                    else
                    {
                        result.Add(word, CountWords(word, words));
                    }
                }
            }

            return result;
        }
示例#9
0
 public StopWordsFilter(IStopWordProvider stopwordProvider)
 {
     _stopwordProvider = stopwordProvider;
 }
示例#10
0
        /// <summary>
        /// Gets a Hashtable of words and integers representing the number of each word.
        /// </summary>
        /// <param name="input">The string to get the word frequency of.</param>
        /// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
        /// <param name="tokenizer">A instance of ITokenizer.</param>
        /// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
        /// <returns></returns>
        public static Hashtable GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
        {
            string convertedInput = input;

            if (!caseSensitive)
            {
                convertedInput = input.ToLower();
            }

            string[] words = tokenizer.Tokenize(convertedInput);
            Array.Sort(words);

            string[] uniqueWords = GetUniqueWords(words);

            Hashtable result = new Hashtable();

            for (int i = 0; i < uniqueWords.Length; i++)
            {
                if (stopWordProvider == null || (IsWord(uniqueWords[i]) && !stopWordProvider.IsStopWord(uniqueWords[i])))
                {
                    if (result.ContainsKey(uniqueWords[i]))
                    {
                        result[uniqueWords[i]] = (int)result[uniqueWords[i]] + CountWords(uniqueWords[i], words);
                    }
                    else
                    {
                        result.Add(uniqueWords[i], CountWords(uniqueWords[i], words));
                    }
                }
            }

            return(result);
        }
示例#11
0
 public StopWordProviderTests()
 {
     _target = new StopwordProvider(TestStopWordListFilePath);
 }
 public StopWordService(IStopWordProvider provider)
 {
     _provider = provider;
 }
示例#13
0
		/// <summary>
		/// Gets a dictionary of words and integers representing the number of each word.
		/// </summary>
		/// <param name="input">The string to get the word frequency of.</param>
		/// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param>
		/// <param name="tokenizer">A instance of ITokenizer.</param>
		/// <param name="stopWordProvider">An instance of IStopWordProvider.</param>
		/// <returns></returns>
		public static IDictionary<string, int> GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider)
		{
			var convertedInput = input;
			if (!caseSensitive)
			{
				convertedInput = input.ToLower();
			}

			var words = tokenizer.Tokenize(convertedInput);
			Array.Sort(words);

			var uniqueWords = GetUniqueWords(words);

			var result = new Dictionary<string, int>();
			for (var i = 0; i < uniqueWords.Length; i++)
			{
				var word = uniqueWords[i];

				if (stopWordProvider == null || (IsWord(word) && !stopWordProvider.IsStopWord(word)))
				{
					int value;
					if (result.TryGetValue(word, out value))
					{
						result[word] = value + CountWords(word, words);
					}
					else
					{
						result.Add(word, CountWords(word, words));
					}
				}
			}

			return result;
		}
 public DocumentDataProvider(IStopWordProvider stopWordProvider, IXmlService xmlService, ITextAnalyzer textAnalyzer)
 {
     this.stopWordProvider = stopWordProvider;
     this.xmlService       = xmlService;
     this.textAnalyzer     = textAnalyzer;
 }