/// <summary> /// Gets a Hashtable of words and integers representing the number of each word. /// </summary> /// <param name="input">The string to get the word frequency of.</param> /// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param> /// <param name="tokenizer">A instance of ITokenizer.</param> /// <param name="stopWordProvider">An instance of IStopWordProvider.</param> /// <returns></returns> public static Hashtable GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider) { string convertedInput = input; if (!caseSensitive) convertedInput = input.ToLower(); string[] words = tokenizer.Tokenize(convertedInput); Array.Sort(words); string[] uniqueWords = GetUniqueWords(words); Hashtable result = new Hashtable(); for (int i = 0; i < uniqueWords.Length; i++) { if (stopWordProvider == null || (IsWord(uniqueWords[i]) && !stopWordProvider.IsStopWord(uniqueWords[i]))) { if (result.ContainsKey(uniqueWords[i])) result[uniqueWords[i]] = (int)result[uniqueWords[i]] + CountWords(uniqueWords[i], words); else result.Add(uniqueWords[i], CountWords(uniqueWords[i], words)); } } return result; }
public Integration( IUserInterfacePortal userInterfacePortal, IManuscriptProvider manuscriptProvider, IStopWordProvider stopWordProvider) { _userInterfacePortal = userInterfacePortal; _manuscriptProvider = manuscriptProvider; _stopWordProvider = stopWordProvider; }
public IntegrationTests() { _userInterfacePortal = Substitute.For <IUserInterfacePortal>(); _manuscriptProvider = new ManuscriptProvider(); _stopWordProvider = new StopwordProvider(TestStopwordListPath); _target = new Integration( _userInterfacePortal, _manuscriptProvider, _stopWordProvider); }
public BayesianClassifier(IWordsDataSource wd, ITokenizer tokenizer, IStopWordProvider swp) { if (wd == null) throw new ArgumentNullException("IWordsDataSource cannot be null."); _wordsData = wd; if (tokenizer == null) throw new ArgumentNullException("ITokenizer cannot be null."); _tokenizer = tokenizer; if (swp == null) throw new ArgumentNullException("IStopWordProvider cannot be null."); _stopWordProvider = swp; }
public BayesianClassifier(IWordsDataSource wd, ITokenizer tokenizer, IStopWordProvider swp) { if (wd == null) { throw new ArgumentNullException("IWordsDataSource cannot be null."); } _wordsData = wd; if (tokenizer == null) { throw new ArgumentNullException("ITokenizer cannot be null."); } _tokenizer = tokenizer; if (swp == null) { throw new ArgumentNullException("IStopWordProvider cannot be null."); } _stopWordProvider = swp; }
public BayesianClassifier(IWordsDataSource wordsDataSource, ITokenizer tokenizer, IStopWordProvider stopWordProvider) { if (wordsDataSource == null) { throw new ArgumentNullException("wordsDataSource"); } _wordsData = wordsDataSource; if (tokenizer == null) { throw new ArgumentNullException("tokenizer"); } _tokenizer = tokenizer; if (stopWordProvider == null) { throw new ArgumentNullException("stopWordProvider"); } _stopWordProvider = stopWordProvider; }
/// <summary> /// Gets a dictionary of words and integers representing the number of each word. /// </summary> /// <param name="input">The string to get the word frequency of.</param> /// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param> /// <param name="tokenizer">A instance of ITokenizer.</param> /// <param name="stopWordProvider">An instance of IStopWordProvider.</param> /// <returns></returns> public static IDictionary<string, int> GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider) { var convertedInput = input; if (!caseSensitive) { convertedInput = input.ToLower(); } var words = tokenizer.Tokenize(convertedInput); Array.Sort(words); var uniqueWords = GetUniqueWords(words); var result = new Dictionary<string, int>(); for (var i = 0; i < uniqueWords.Length; i++) { var word = uniqueWords[i]; if (stopWordProvider == null || (IsWord(word) && !stopWordProvider.IsStopWord(word))) { int value; if (result.TryGetValue(word, out value)) { result[word] = value + CountWords(word, words); } else { result.Add(word, CountWords(word, words)); } } } return result; }
public StopWordsFilter(IStopWordProvider stopwordProvider) { _stopwordProvider = stopwordProvider; }
/// <summary> /// Gets a Hashtable of words and integers representing the number of each word. /// </summary> /// <param name="input">The string to get the word frequency of.</param> /// <param name="caseSensitive">True if words should be treated as separate if they have different casing.</param> /// <param name="tokenizer">A instance of ITokenizer.</param> /// <param name="stopWordProvider">An instance of IStopWordProvider.</param> /// <returns></returns> public static Hashtable GetWordFrequency(string input, bool caseSensitive, ITokenizer tokenizer, IStopWordProvider stopWordProvider) { string convertedInput = input; if (!caseSensitive) { convertedInput = input.ToLower(); } string[] words = tokenizer.Tokenize(convertedInput); Array.Sort(words); string[] uniqueWords = GetUniqueWords(words); Hashtable result = new Hashtable(); for (int i = 0; i < uniqueWords.Length; i++) { if (stopWordProvider == null || (IsWord(uniqueWords[i]) && !stopWordProvider.IsStopWord(uniqueWords[i]))) { if (result.ContainsKey(uniqueWords[i])) { result[uniqueWords[i]] = (int)result[uniqueWords[i]] + CountWords(uniqueWords[i], words); } else { result.Add(uniqueWords[i], CountWords(uniqueWords[i], words)); } } } return(result); }
public StopWordProviderTests() { _target = new StopwordProvider(TestStopWordListFilePath); }
public StopWordService(IStopWordProvider provider) { _provider = provider; }
public DocumentDataProvider(IStopWordProvider stopWordProvider, IXmlService xmlService, ITextAnalyzer textAnalyzer) { this.stopWordProvider = stopWordProvider; this.xmlService = xmlService; this.textAnalyzer = textAnalyzer; }