public void Add_Words_To_Dictionary() { //Arrange Dictionary <string, int> myWordDictionary = new Dictionary <string, int>(); List <string> myWordList = new List <string>() { "the", "railway", "children", "the", "children", "father", "the" }; Dictionary <string, int> myExpectedWordDictionary = new Dictionary <string, int>(); myExpectedWordDictionary.Add("the", 3); myExpectedWordDictionary.Add("children", 2); myExpectedWordDictionary.Add("railway", 1); myExpectedWordDictionary.Add("father", 1); var myWordCountUtils = new WordCountUtils(); //Act //Loop through words in word list myWordCountUtils.WordCountDictionary(myWordList, myWordDictionary); //Assert CollectionAssert.AreEquivalent(myExpectedWordDictionary, myWordDictionary); CollectionAssert.AreEqual(myExpectedWordDictionary, myWordDictionary); }
/// <summary> /// Get word count from input based on options selected /// /// </summary> /// <param name="input">Input can be either URL or Text</param> /// <param name="isFilterStopWord">set to true will filterout pre-defined stop word</param> /// <param name="options"> <list type="bullet"> <item> 0: calculates number of occurences of each word on the page </item> <item> 1: calculates number of occurences of each word on the page meta tags. </item> <item> 2: calculates number of external links in the text.</item></list > </param> /// <param name="sortByColumn"><para>1: sort by word </para><para> 1: sort by count</para></param> /// <param name="sortOrder"><para>0:ASC</para><para> 1: DESC</para></param> public WordCountResponse Get(string input, bool isFilterStopWord, OptionsEnum options, int sortByColumn = 2, SortEnum sortOrder = SortEnum.DESC) { string content = ""; Dictionary <string, int> result = new Dictionary <string, int>(); Dictionary <string, int> resultSorted = new Dictionary <string, int>(); if (!isFilterStopWord) { stopWord = new List <string>(); } try { bool isUri = Uri.IsWellFormedUriString(input, UriKind.RelativeOrAbsolute); content = (isUri) ? WordCountUtils.LoadContentFromURL(input) : input; switch (options) { case OptionsEnum.COUNTLINKS: { result = WordCountUtils.GetLinkCount(content); break; } case OptionsEnum.COUNTWORDINMETA: { result = WordCountUtils.GetMetaTextCount(content, stopWord); break; } case OptionsEnum.COUNTWORD: default: { result = WordCountUtils.GetWordCount(content, stopWord); break; } } resultSorted = WordCountUtils.SortResult(result, sortByColumn, sortOrder); } catch (Exception ex) { //log it throw new HttpResponseException(new HttpResponseMessage(HttpStatusCode.InternalServerError) { Content = new StringContent(ex.Message), ReasonPhrase = "Something went wrong" }); } res.Source = input; res.IsFilterStopWord = isFilterStopWord; res.SelectedOption = options; res.result = resultSorted; return(res); }
public void SplitString_By_Space_And_RemoveEmptyEntries() { //Arrange string inputText = "the railway children they "; string[] outputArray = { "the", "railway", "children", "they" }; var myWordCountUtils = new WordCountUtils(); //Act var myWordList = myWordCountUtils.SplitWords(inputText); //List<string> wordList = inputText.Split(new []{' '},StringSplitOptions.RemoveEmptyEntries).ToList(); //Assert CollectionAssert.AreEqual(outputArray, myWordList); }
public void Remove_Punctuation() { //Arrange string inputText = "CHILDREN!\r\n\r\nBy E. () [] ^ _ - ;\r\n\r\n\r\n I. The beginning of things.\r\n"; string expectedOutputText = "CHILDREN\r\n\r\nBy E ^ \r\n\r\n\r\n I The beginning of things\r\n"; string outputText = ""; var myWordCountUtils = new WordCountUtils(); //Act //outputText = new string(inputText.Where(c => !char.IsPunctuation(c)).ToArray()); outputText = myWordCountUtils.RemovePunctuation(inputText); //Assert Assert.AreEqual(expectedOutputText, outputText); }
public void Remove_RomanNumerals_FollowedBy_FullStop() { //Arrange // string pattern = @"(?=[MDCLXVI])M*(C[MD]|D?C{0,3})(X[CL]|L?X{0,3})(I[XV]|V?I{0,3}[.])"; string inputText = "I. The beginning of things.II. Peter's coal-mine. I don't suppose they had..."; string expectedOutput = " The beginning of things. Peter's coal-mine. I don't suppose they had..."; var myWordCountUtils = new WordCountUtils(); var myTextBook = new TextBook(); //Act //inputText = System.Text.RegularExpressions.Regex.Replace(inputText, pattern, " "); inputText = myWordCountUtils.RegexCleaner(inputText, myTextBook.Pattern); //Assert Assert.AreEqual(expectedOutput, inputText); }
public void Remove_Specific_Characters() { //Arrange string inputText = "CHILDREN\r\n\r\nBy E ^ \r\n\r\n\r\n I The beginning of things\r\n"; // Define characters to strip from the input and do it string[] stripChars = { "^", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\n", "\t", "\r" }; string expectedOutputText = "CHILDREN By E I The beginning of things "; var myWordCountUtils = new WordCountUtils(); //Act inputText = myWordCountUtils.CleanDistinctCharacters(inputText, stripChars); //Assert Assert.AreEqual(expectedOutputText, inputText); }
//Clear output and instantiate objects private void resetValues() { txtOutput.Clear();//Clear output content myTextBook = new TextBook(); myWordCountUtils = new WordCountUtils(); }