Beispiel #1
0
        public void Add_Words_To_Dictionary()
        {
            //Arrange
            Dictionary <string, int> myWordDictionary = new Dictionary <string, int>();
            List <string>            myWordList       = new List <string>()
            {
                "the", "railway", "children", "the", "children", "father", "the"
            };
            Dictionary <string, int> myExpectedWordDictionary = new Dictionary <string, int>();

            myExpectedWordDictionary.Add("the", 3);
            myExpectedWordDictionary.Add("children", 2);
            myExpectedWordDictionary.Add("railway", 1);
            myExpectedWordDictionary.Add("father", 1);

            var myWordCountUtils = new WordCountUtils();


            //Act
            //Loop through words in word list
            myWordCountUtils.WordCountDictionary(myWordList, myWordDictionary);

            //Assert
            CollectionAssert.AreEquivalent(myExpectedWordDictionary, myWordDictionary);
            CollectionAssert.AreEqual(myExpectedWordDictionary, myWordDictionary);
        }
        /// <summary>
        /// Get word count from input based on options selected
        ///
        /// </summary>
        /// <param name="input">Input can be either URL or Text</param>
        /// <param name="isFilterStopWord">set to true will filterout pre-defined stop word</param>
        /// <param name="options"> <list type="bullet"> <item> 0: calculates number of occurences of each word on the page </item> <item>  1: calculates number of occurences of each word on the page meta tags. </item>  <item> 2: calculates number of external links in the text.</item></list >  </param>
        /// <param name="sortByColumn"><para>1: sort by word </para><para> 1: sort by count</para></param>
        /// <param name="sortOrder"><para>0:ASC</para><para> 1: DESC</para></param>
        public WordCountResponse Get(string input, bool isFilterStopWord, OptionsEnum options, int sortByColumn = 2, SortEnum sortOrder = SortEnum.DESC)
        {
            string content = "";
            Dictionary <string, int> result       = new Dictionary <string, int>();
            Dictionary <string, int> resultSorted = new Dictionary <string, int>();

            if (!isFilterStopWord)
            {
                stopWord = new List <string>();
            }
            try
            {
                bool isUri = Uri.IsWellFormedUriString(input, UriKind.RelativeOrAbsolute);
                content = (isUri) ? WordCountUtils.LoadContentFromURL(input) : input;

                switch (options)
                {
                case OptionsEnum.COUNTLINKS:
                {
                    result = WordCountUtils.GetLinkCount(content);
                    break;
                }

                case OptionsEnum.COUNTWORDINMETA:
                {
                    result = WordCountUtils.GetMetaTextCount(content, stopWord);
                    break;
                }

                case OptionsEnum.COUNTWORD:
                default:
                {
                    result = WordCountUtils.GetWordCount(content, stopWord);
                    break;
                }
                }

                resultSorted = WordCountUtils.SortResult(result, sortByColumn, sortOrder);
            }
            catch (Exception ex)
            {
                //log it

                throw new HttpResponseException(new HttpResponseMessage(HttpStatusCode.InternalServerError)
                {
                    Content      = new StringContent(ex.Message),
                    ReasonPhrase = "Something went wrong"
                });
            }

            res.Source           = input;
            res.IsFilterStopWord = isFilterStopWord;
            res.SelectedOption   = options;
            res.result           = resultSorted;

            return(res);
        }
Beispiel #3
0
        public void SplitString_By_Space_And_RemoveEmptyEntries()
        {
            //Arrange
            string inputText = "the railway children  they  ";

            string[] outputArray = { "the", "railway", "children", "they" };

            var myWordCountUtils = new WordCountUtils();

            //Act
            var myWordList = myWordCountUtils.SplitWords(inputText);

            //List<string> wordList = inputText.Split(new []{' '},StringSplitOptions.RemoveEmptyEntries).ToList();
            //Assert
            CollectionAssert.AreEqual(outputArray, myWordList);
        }
Beispiel #4
0
        public void Remove_Punctuation()
        {
            //Arrange
            string inputText          = "CHILDREN!\r\n\r\nBy E. () [] ^ _ - ;\r\n\r\n\r\n  I.    The beginning of things.\r\n";
            string expectedOutputText = "CHILDREN\r\n\r\nBy E   ^   \r\n\r\n\r\n  I    The beginning of things\r\n";
            string outputText         = "";
            var    myWordCountUtils   = new WordCountUtils();

            //Act
            //outputText = new string(inputText.Where(c => !char.IsPunctuation(c)).ToArray());
            outputText = myWordCountUtils.RemovePunctuation(inputText);


            //Assert
            Assert.AreEqual(expectedOutputText, outputText);
        }
Beispiel #5
0
        public void Remove_RomanNumerals_FollowedBy_FullStop()
        {
            //Arrange
            // string pattern = @"(?=[MDCLXVI])M*(C[MD]|D?C{0,3})(X[CL]|L?X{0,3})(I[XV]|V?I{0,3}[.])";
            string inputText        = "I. The beginning of things.II. Peter's coal-mine. I don't suppose they had...";
            string expectedOutput   = "  The beginning of things.  Peter's coal-mine. I don't suppose they had...";
            var    myWordCountUtils = new WordCountUtils();
            var    myTextBook       = new TextBook();

            //Act
            //inputText = System.Text.RegularExpressions.Regex.Replace(inputText, pattern, " ");
            inputText = myWordCountUtils.RegexCleaner(inputText, myTextBook.Pattern);

            //Assert
            Assert.AreEqual(expectedOutput, inputText);
        }
Beispiel #6
0
        public void Remove_Specific_Characters()
        {
            //Arrange
            string inputText = "CHILDREN\r\n\r\nBy E   ^   \r\n\r\n\r\n  I    The beginning of things\r\n";

            // Define characters to strip from the input and do it
            string[] stripChars         = { "^", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\n", "\t", "\r" };
            string   expectedOutputText = "CHILDREN    By E               I    The beginning of things  ";
            var      myWordCountUtils   = new WordCountUtils();


            //Act
            inputText = myWordCountUtils.CleanDistinctCharacters(inputText, stripChars);

            //Assert
            Assert.AreEqual(expectedOutputText, inputText);
        }
Beispiel #7
0
 //Clear output and instantiate objects
 private void resetValues()
 {
     txtOutput.Clear();//Clear output content
     myTextBook       = new TextBook();
     myWordCountUtils = new WordCountUtils();
 }