public string ConsumeWord()
        {
            var sb = new StringBuilder();

            try
            {
                char nextCharacter;
                while (!char.IsLetter(nextCharacter = _reader.GetNextChar()))
                {
                    // drop non-letter characters
                }

                sb.Append(nextCharacter);
                while (char.IsLetter(nextCharacter = _reader.GetNextChar()))
                {
                    sb.Append(nextCharacter);
                }
            }

            catch (EndOfStreamException)
            {
                // end of stream
                return(sb.ToString());
            }

            catch (Exception e)
            {
                throw e;
            }

            return(sb.ToString().ToLowerInvariant());
        }
Пример #2
0
        public async Task ProcessReaderAsync(ICharacterReader reader)
        {
            var buffer = new StringBuilder();

            try
            {
                do
                {
                    var res = reader.GetNextChar();
                    if (res.IsLetter() || res.IsAcceptedSymbol())
                    {
                        if (buffer.Length > 0 &&
                            (buffer.ToString()[buffer.ToString().Length - 1].IsAcceptedSymbol() && res.IsAcceptedSymbol())
                            )
                        {
                            buffer.Remove(buffer.Length - 1, 1);

                            _dictionary.Add(buffer.ToString());
                            buffer.Clear();
                            continue;
                        }
                        buffer.Append(res);
                    }
                    else
                    {
                        _dictionary.Add(buffer.ToString());
                        buffer.Clear();
                    }
                } while (true);
            }
            catch (EndOfStreamException)
            {
                _dictionary.Add(buffer.ToString());
            }
        }
        private void CalculateWords(ICharacterReader reader, Dictionary <string, int> wordCount)
        {
            string word = "";

            while (true)
            {
                try
                {
                    char letter = reader.GetNextChar();
                    if ((letter.IsAcceptedSymbol() && !word.Contains(_filterSymbols)) || letter.IsLetter())
                    {
                        word += letter;
                    }
                    else
                    {
                        SetWord(wordCount, word);
                        word = "";
                    }
                }
                catch (EndOfStreamException)
                {
                    break;
                }
            }

            SetWord(wordCount, word);
        }
Пример #4
0
        /// <summary>
        /// This is the answer to question 1.
        ///
        /// ASSUMPTIONS:
        /// ============
        ///
        /// 1. The text read by the reader only contains words of the English language, so in this
        /// case there is no provision for words of other languages written in different alphabets,
        /// or for numerals, etc. Please also see the assumptions listed in class CharExtensions.cs
        /// for more details.
        ///
        /// 2. The second assumption, and in accordance to assumption 1 above, is that when it comes
        /// to deciding if a character is part of a word or not, we assume that we are always dealing
        /// with characters of the English alphabet, both lower case and capitals, and with any special
        /// symbols that can be accepted as part of an English word, i.e. hyphen. In this case, there
        /// is no provision for characters of different alphabets, numerals, etc.
        ///
        /// 3. Another assumption is that it is preferable to deal with the input character stream
        /// dynamically, on the fly, instead of reading the whole stream and storing it in a local
        /// variable prior to processing it. This, for example, could help in situations of extremely
        /// long streams (such as when reading a character stream from a large file) that would require
        /// the use of extensive amounts of in-memory storage prior to processing.
        ///
        /// LOGIC:
        /// ======
        ///
        /// An instance of this class (DeveloperTestImplementationAsync) is created elsewhere in the code
        /// (for example in a unit test such as StandardTestAsync.TestQuestionOneAsync()). This method
        /// can also be called asynchronously as part of the solution of question 2. When it is called
        /// two objects are passed into it as dependencies using the method dependency injection pattern,
        /// the first one is a reader object, for example an instance of the SimpleCharacterReader or the
        /// SlowCharacterReader class, they both implement the ICharacterReader interface, and the second
        /// one is an output object, which is an instance of the Question1TestOutput class that implements
        /// the IOutputResult interface.
        ///
        /// The purpose of this method is to use these two objects in order to read a character stream,
        /// dynamically process the character stream in order to separate it into English words, then order
        /// those words by frequency and then alphabetically, and finally create an appropriate output in
        /// the required output format that will be tested by the relevant unit test mentioned above for its
        /// correctness (i.e. the words that it contains, the frequency of appearence of each word in the character
        /// stream, and whether these words have been ordered by frequency and alphabetically as required).
        ///
        /// Reading the character stream: In order to successfully read the character stream this method uses
        /// a simple do-while loop that in each iteration reads a new character, then process the character
        /// in order to make a decision if it should be accepted as part of an English word or not, and
        /// consequently adds the character in the next word or rejects it.
        ///
        /// EndOfStreamException: The algorithm then deals with the EndOfStreamException, thrown by the reader
        /// when the end of the character stream has been reached, by making sure in the finally sub-block of the
        /// try-catch-finally block that the very last word read is captured correctly and stored in the dictionary in the
        /// same way as with all the previous words.
        ///
        /// Finally, this method makes sure that the dictionary of words and word frequences is sorted
        /// according to the requirements, i.e. first by word frequency and then alphabetically, and then creates
        /// the desired output.
        ///
        /// Parts of this algorithm, such as processing a character, forming a word, sorting the dictionary, and
        /// creating the required output, have been implemented as separate methods in order to modularise and
        /// declutter the main algorithm. The purpose here is to improve its readibity and at the same time to
        /// demonstrate how to create methods for reoccurring tasks and functions, for improved reusability,
        /// and for making it more clear where and how the state of certain objects is changed (instead of changing the
        /// state of these objects all over the place). I have chosen to pass parameters back and forth to these
        /// methods (even if in some cases they are dealing with the member variables that are already available
        /// to them) in order to show that we could easily move them out of this class altogether, maybe into some
        /// sort of helper class where they could be re-used by other classes of a hypothetical bigger program.
        /// For simplicity I have left these methods in this class.
        /// </summary>
        /// <param name="reader">An object of a class that implements the ICharacterReader interface, which provides
        /// a method for reading the next character of a character stream.</param>
        /// <param name="output">An object of a class that implements the IOutputResult interface, which allows to
        /// format an output according to specified requirements.</param>
        public Task RunQuestionOne(ICharacterReader reader, IOutputResult output)
        {
            // This whole task can run asynchronously. This is useful when we need to run several readers
            // in parallel and we do not want to wait for the synchronous completion of each reader before
            // starting the next one.
            return(Task.Run(async() =>
            {
                // A string variable that helps us form the next word from the input character stream.
                string nextWord = string.Empty;

                using (reader)
                {
                    try
                    {
                        // This is the main loop that reads a stream of characters, one by one,
                        // processes the characters according to the assumptions made above, forms
                        // English words and stores these words to a dictionary collection,
                        // keeping also track of how often each of these words appear in the input
                        // stream (word frequency).
                        do
                        {
                            ProcessNextChar(reader.GetNextChar(), ref nextWord, wordDictionary);
                        } while (true);
                    }
                    catch (EndOfStreamException e)
                    {
                        // Normally an error message, like the one below, would be logged in a log file or
                        // log database, by being passed to an appropriate method of a dedicated logger object.
                        // As this is out of the scope of this exercise, for now I am just imitating
                        // logging the error message by just displaying the error message to the console.
                        Console.WriteLine($"Error reading stream: {e.GetType().Name}.");
                    }
                    finally
                    {
                        // Here we make sure that we do not miss out the very last word of the input stream
                        // because of the EndOfStreamException thrown by the GetNextChar() method of the reader.
                        if (nextWord != string.Empty)
                        {
                            AddStringToDictionary(wordDictionary, nextWord);
                        }
                    }

                    // Sort the dictionary by word frequency and then alphabetically and then
                    // create the required output. The delay period is part of the mechanism that
                    // allows to create an output on specified intervals and it is used only for
                    // question two. The default value is zero, meaning an immediate creation of the output.
                    if (!wasCallInitiatedInQuestionTwo)
                    {
                        CreateOutputAsync(SortDictionary(wordDictionary), output, defaultDelayPeriod);
                    }

                    // Allow some time for the completion of this task before exiting.
                    await DelayTimerAsync(questionOneTimeout);
                }
            }));
        }
Пример #5
0
        /// <summary>
        /// This is the answer to question 1.
        ///
        /// ASSUMPTIONS:
        /// ============
        ///
        /// 1. The text read by the reader only contains words of the English language, so in this
        /// case there is no provision for words of other languages written in different alphabets,
        /// or for numericals, etc. Please also see the assumptions listed in class CharExtensions.cs
        /// for more details.
        ///
        /// 2. The second assumption, and in accordance to assumption 1 above, is that when it comes
        /// to deciding if a character is part of a word or not, we assume that we are always dealing
        /// with characters of the English alphabet, both lower case and capitals. So, there is no
        /// provision for characters of different alphabets, numericals, etc.
        ///
        /// 3. Another assumption is that it is preferable to deal with the input character stream
        /// dynamically, on the fly, instead of reading the whole stream and storing it in a local
        /// variable first before processing it. This, for example, could help in situations of extremely
        /// long streams (for example reading a character stream from a file) that would require the use
        /// of extensive amounts of in-memory storage prior to processing.
        ///
        /// LOGIC:
        /// ======
        ///
        /// An instance of this class (DeveloperTestImplementation) is created elsewhere in the code
        /// (specifically in the unit test: StandardTest.TestQuestionOne()). At that moment two objects
        /// are passed to this method as dependencies using the method dependency injection pattern,
        /// the first one is a reader object, which is an instance of the SimpleCharacterReader class that
        /// implements the ICharacterReader interface, and the second one is an output object, which is an
        /// instance of the Question1TestOutput class that implements the IOutputResult interface.
        ///
        /// The purpose of this method is to use these two objects in order to read a character stream,
        /// dynamically process the character stream in order to separate it into English words, then order
        /// those words by frequency and then alphabetically, and finally create an appropriate output in
        /// the required output format that will be tested by the relevant unit test mentioned above for its
        /// correctness (i.e. the words that contains, the frequency that each word appears in the character
        /// stream, and whether these words have been ordered by frequency and alphabetically as required).
        ///
        /// Reading the character stream: In order to successfully read the character stream this method uses
        /// a simple do-while loop that in each iteration reads a new character, then decides if the character
        /// is a letter of the English alphabet or not (using the extension method IsLetter()) and accordingly
        /// either adds the character to the next word if it is indeed a letter or adds the word to a dictionary
        /// of words if the last read character is a white space, new line character, a comma, etc, i.e. anything
        /// other than a letter. When a word is added to the dictionary the algorithm makes sure that the
        /// string that holds the next word is initialised to an empty string in order to be able to hold the
        /// next word successfully. The algorithm then deals with the EndOfStreamException raised by the reader
        /// by making sure in the finally part of the try-catch-finally block that the very last word read is not
        /// lost, but stored in the dictionary in the same way with all the previous words.
        ///
        /// The last line of this method makes sure that the dictionary of words and word frequences is sorted
        /// according to the requirements, i.e. first by word frequency and then alphabetically, and then creates
        /// the desired output. These two tasks, sorting the dictionary and creating the required output, have
        /// been implemented in separate private method in order to declutter the main algorithm, improve its
        /// readibity, and demonstrate how to create methods for reoccurring tasks and functions, improving reusability,
        /// and making it more clear where and how we change the state of certain objects (instead of changing the
        /// state of objects all over the place). For example we can see that the method AddStringToDictionary(...)
        /// is called twice, once from inside the main do-while loop and then again from the finally sub-block of the
        /// try-catch-finally block.
        /// </summary>
        /// <param name="reader">An object of a class that implements the ICharacterReader interface, which provides
        /// a method for reading the next character of a character stream.</param>
        /// <param name="output">An object of a class that implements the IOutputResult interface, which allows to
        /// format an output according to specific requirements.</param>
        public void RunQuestionOne(ICharacterReader reader, IOutputResult output)
        {
            // A dictionary collection that holds words as strings and the frequency of their appearence as integers.
            IDictionary <string, int> wordDictionary = new Dictionary <string, int>();

            // A string variable that helps us form the next word from the input character stream.
            string nextWord = string.Empty;

            using (reader)
            {
                try
                {
                    // This is the main loop that reads a stream of characters, one by one,
                    // splits the stream into English words, according to the assumptions
                    // made above, and then stores the words into a dictionary collection
                    // keeping also track of how often each word appears in the input stream
                    // (word frequency).
                    do
                    {
                        // Read the next character from the stream of characters.
                        char nextChar = reader.GetNextChar();

                        // As long as the next character is a letter keep adding it to the next word,
                        // as soon as you have encountered a word's end (indicated by a whitespace
                        // character, a symbol such as a comma or a full stop, a new line character or
                        // something similar) add the word to the dictionary and reset the variable
                        // in order to be used to form the next word from scratch.
                        if (nextChar.IsLetter())
                        {
                            nextWord += nextChar.ToString().ToLower();
                        }
                        else
                        {
                            if (nextWord != string.Empty)
                            {
                                AddStringToDictionary(wordDictionary, nextWord);
                                nextWord = string.Empty;
                            }
                        }
                    } while (true);
                }
                catch (EndOfStreamException e)
                {
                    // Normally an error message, like the one below, would be logged in a log file or
                    // log database, by being passed to an appropriate method of a dedicated logger object.
                    // As this is out of the scope of this exercise, for now I am just imitating the
                    // error message logging by just displaying the error message to the console.
                    Console.WriteLine($"Error reading stream: {e.GetType().Name}.");
                }
                finally
                {
                    // Here we make sure that we do not miss the very last word of the input stream
                    // because of the EndOfStreamException thrown by the GetNextChar() method of the reader.
                    if (nextWord != string.Empty)
                    {
                        AddStringToDictionary(wordDictionary, nextWord);
                    }
                }

                // Sort the dictionary by word frequency and then alphabetically and then
                // create the required output.
                CreateOutput(SortDictionary(wordDictionary), output);
            }
        }