public string ConsumeWord() { var sb = new StringBuilder(); try { char nextCharacter; while (!char.IsLetter(nextCharacter = _reader.GetNextChar())) { // drop non-letter characters } sb.Append(nextCharacter); while (char.IsLetter(nextCharacter = _reader.GetNextChar())) { sb.Append(nextCharacter); } } catch (EndOfStreamException) { // end of stream return(sb.ToString()); } catch (Exception e) { throw e; } return(sb.ToString().ToLowerInvariant()); }
public async Task ProcessReaderAsync(ICharacterReader reader) { var buffer = new StringBuilder(); try { do { var res = reader.GetNextChar(); if (res.IsLetter() || res.IsAcceptedSymbol()) { if (buffer.Length > 0 && (buffer.ToString()[buffer.ToString().Length - 1].IsAcceptedSymbol() && res.IsAcceptedSymbol()) ) { buffer.Remove(buffer.Length - 1, 1); _dictionary.Add(buffer.ToString()); buffer.Clear(); continue; } buffer.Append(res); } else { _dictionary.Add(buffer.ToString()); buffer.Clear(); } } while (true); } catch (EndOfStreamException) { _dictionary.Add(buffer.ToString()); } }
private void CalculateWords(ICharacterReader reader, Dictionary <string, int> wordCount) { string word = ""; while (true) { try { char letter = reader.GetNextChar(); if ((letter.IsAcceptedSymbol() && !word.Contains(_filterSymbols)) || letter.IsLetter()) { word += letter; } else { SetWord(wordCount, word); word = ""; } } catch (EndOfStreamException) { break; } } SetWord(wordCount, word); }
/// <summary> /// This is the answer to question 1. /// /// ASSUMPTIONS: /// ============ /// /// 1. The text read by the reader only contains words of the English language, so in this /// case there is no provision for words of other languages written in different alphabets, /// or for numerals, etc. Please also see the assumptions listed in class CharExtensions.cs /// for more details. /// /// 2. The second assumption, and in accordance to assumption 1 above, is that when it comes /// to deciding if a character is part of a word or not, we assume that we are always dealing /// with characters of the English alphabet, both lower case and capitals, and with any special /// symbols that can be accepted as part of an English word, i.e. hyphen. In this case, there /// is no provision for characters of different alphabets, numerals, etc. /// /// 3. Another assumption is that it is preferable to deal with the input character stream /// dynamically, on the fly, instead of reading the whole stream and storing it in a local /// variable prior to processing it. This, for example, could help in situations of extremely /// long streams (such as when reading a character stream from a large file) that would require /// the use of extensive amounts of in-memory storage prior to processing. /// /// LOGIC: /// ====== /// /// An instance of this class (DeveloperTestImplementationAsync) is created elsewhere in the code /// (for example in a unit test such as StandardTestAsync.TestQuestionOneAsync()). This method /// can also be called asynchronously as part of the solution of question 2. When it is called /// two objects are passed into it as dependencies using the method dependency injection pattern, /// the first one is a reader object, for example an instance of the SimpleCharacterReader or the /// SlowCharacterReader class, they both implement the ICharacterReader interface, and the second /// one is an output object, which is an instance of the Question1TestOutput class that implements /// the IOutputResult interface. /// /// The purpose of this method is to use these two objects in order to read a character stream, /// dynamically process the character stream in order to separate it into English words, then order /// those words by frequency and then alphabetically, and finally create an appropriate output in /// the required output format that will be tested by the relevant unit test mentioned above for its /// correctness (i.e. the words that it contains, the frequency of appearence of each word in the character /// stream, and whether these words have been ordered by frequency and alphabetically as required). /// /// Reading the character stream: In order to successfully read the character stream this method uses /// a simple do-while loop that in each iteration reads a new character, then process the character /// in order to make a decision if it should be accepted as part of an English word or not, and /// consequently adds the character in the next word or rejects it. /// /// EndOfStreamException: The algorithm then deals with the EndOfStreamException, thrown by the reader /// when the end of the character stream has been reached, by making sure in the finally sub-block of the /// try-catch-finally block that the very last word read is captured correctly and stored in the dictionary in the /// same way as with all the previous words. /// /// Finally, this method makes sure that the dictionary of words and word frequences is sorted /// according to the requirements, i.e. first by word frequency and then alphabetically, and then creates /// the desired output. /// /// Parts of this algorithm, such as processing a character, forming a word, sorting the dictionary, and /// creating the required output, have been implemented as separate methods in order to modularise and /// declutter the main algorithm. The purpose here is to improve its readibity and at the same time to /// demonstrate how to create methods for reoccurring tasks and functions, for improved reusability, /// and for making it more clear where and how the state of certain objects is changed (instead of changing the /// state of these objects all over the place). I have chosen to pass parameters back and forth to these /// methods (even if in some cases they are dealing with the member variables that are already available /// to them) in order to show that we could easily move them out of this class altogether, maybe into some /// sort of helper class where they could be re-used by other classes of a hypothetical bigger program. /// For simplicity I have left these methods in this class. /// </summary> /// <param name="reader">An object of a class that implements the ICharacterReader interface, which provides /// a method for reading the next character of a character stream.</param> /// <param name="output">An object of a class that implements the IOutputResult interface, which allows to /// format an output according to specified requirements.</param> public Task RunQuestionOne(ICharacterReader reader, IOutputResult output) { // This whole task can run asynchronously. This is useful when we need to run several readers // in parallel and we do not want to wait for the synchronous completion of each reader before // starting the next one. return(Task.Run(async() => { // A string variable that helps us form the next word from the input character stream. string nextWord = string.Empty; using (reader) { try { // This is the main loop that reads a stream of characters, one by one, // processes the characters according to the assumptions made above, forms // English words and stores these words to a dictionary collection, // keeping also track of how often each of these words appear in the input // stream (word frequency). do { ProcessNextChar(reader.GetNextChar(), ref nextWord, wordDictionary); } while (true); } catch (EndOfStreamException e) { // Normally an error message, like the one below, would be logged in a log file or // log database, by being passed to an appropriate method of a dedicated logger object. // As this is out of the scope of this exercise, for now I am just imitating // logging the error message by just displaying the error message to the console. Console.WriteLine($"Error reading stream: {e.GetType().Name}."); } finally { // Here we make sure that we do not miss out the very last word of the input stream // because of the EndOfStreamException thrown by the GetNextChar() method of the reader. if (nextWord != string.Empty) { AddStringToDictionary(wordDictionary, nextWord); } } // Sort the dictionary by word frequency and then alphabetically and then // create the required output. The delay period is part of the mechanism that // allows to create an output on specified intervals and it is used only for // question two. The default value is zero, meaning an immediate creation of the output. if (!wasCallInitiatedInQuestionTwo) { CreateOutputAsync(SortDictionary(wordDictionary), output, defaultDelayPeriod); } // Allow some time for the completion of this task before exiting. await DelayTimerAsync(questionOneTimeout); } })); }
/// <summary> /// This is the answer to question 1. /// /// ASSUMPTIONS: /// ============ /// /// 1. The text read by the reader only contains words of the English language, so in this /// case there is no provision for words of other languages written in different alphabets, /// or for numericals, etc. Please also see the assumptions listed in class CharExtensions.cs /// for more details. /// /// 2. The second assumption, and in accordance to assumption 1 above, is that when it comes /// to deciding if a character is part of a word or not, we assume that we are always dealing /// with characters of the English alphabet, both lower case and capitals. So, there is no /// provision for characters of different alphabets, numericals, etc. /// /// 3. Another assumption is that it is preferable to deal with the input character stream /// dynamically, on the fly, instead of reading the whole stream and storing it in a local /// variable first before processing it. This, for example, could help in situations of extremely /// long streams (for example reading a character stream from a file) that would require the use /// of extensive amounts of in-memory storage prior to processing. /// /// LOGIC: /// ====== /// /// An instance of this class (DeveloperTestImplementation) is created elsewhere in the code /// (specifically in the unit test: StandardTest.TestQuestionOne()). At that moment two objects /// are passed to this method as dependencies using the method dependency injection pattern, /// the first one is a reader object, which is an instance of the SimpleCharacterReader class that /// implements the ICharacterReader interface, and the second one is an output object, which is an /// instance of the Question1TestOutput class that implements the IOutputResult interface. /// /// The purpose of this method is to use these two objects in order to read a character stream, /// dynamically process the character stream in order to separate it into English words, then order /// those words by frequency and then alphabetically, and finally create an appropriate output in /// the required output format that will be tested by the relevant unit test mentioned above for its /// correctness (i.e. the words that contains, the frequency that each word appears in the character /// stream, and whether these words have been ordered by frequency and alphabetically as required). /// /// Reading the character stream: In order to successfully read the character stream this method uses /// a simple do-while loop that in each iteration reads a new character, then decides if the character /// is a letter of the English alphabet or not (using the extension method IsLetter()) and accordingly /// either adds the character to the next word if it is indeed a letter or adds the word to a dictionary /// of words if the last read character is a white space, new line character, a comma, etc, i.e. anything /// other than a letter. When a word is added to the dictionary the algorithm makes sure that the /// string that holds the next word is initialised to an empty string in order to be able to hold the /// next word successfully. The algorithm then deals with the EndOfStreamException raised by the reader /// by making sure in the finally part of the try-catch-finally block that the very last word read is not /// lost, but stored in the dictionary in the same way with all the previous words. /// /// The last line of this method makes sure that the dictionary of words and word frequences is sorted /// according to the requirements, i.e. first by word frequency and then alphabetically, and then creates /// the desired output. These two tasks, sorting the dictionary and creating the required output, have /// been implemented in separate private method in order to declutter the main algorithm, improve its /// readibity, and demonstrate how to create methods for reoccurring tasks and functions, improving reusability, /// and making it more clear where and how we change the state of certain objects (instead of changing the /// state of objects all over the place). For example we can see that the method AddStringToDictionary(...) /// is called twice, once from inside the main do-while loop and then again from the finally sub-block of the /// try-catch-finally block. /// </summary> /// <param name="reader">An object of a class that implements the ICharacterReader interface, which provides /// a method for reading the next character of a character stream.</param> /// <param name="output">An object of a class that implements the IOutputResult interface, which allows to /// format an output according to specific requirements.</param> public void RunQuestionOne(ICharacterReader reader, IOutputResult output) { // A dictionary collection that holds words as strings and the frequency of their appearence as integers. IDictionary <string, int> wordDictionary = new Dictionary <string, int>(); // A string variable that helps us form the next word from the input character stream. string nextWord = string.Empty; using (reader) { try { // This is the main loop that reads a stream of characters, one by one, // splits the stream into English words, according to the assumptions // made above, and then stores the words into a dictionary collection // keeping also track of how often each word appears in the input stream // (word frequency). do { // Read the next character from the stream of characters. char nextChar = reader.GetNextChar(); // As long as the next character is a letter keep adding it to the next word, // as soon as you have encountered a word's end (indicated by a whitespace // character, a symbol such as a comma or a full stop, a new line character or // something similar) add the word to the dictionary and reset the variable // in order to be used to form the next word from scratch. if (nextChar.IsLetter()) { nextWord += nextChar.ToString().ToLower(); } else { if (nextWord != string.Empty) { AddStringToDictionary(wordDictionary, nextWord); nextWord = string.Empty; } } } while (true); } catch (EndOfStreamException e) { // Normally an error message, like the one below, would be logged in a log file or // log database, by being passed to an appropriate method of a dedicated logger object. // As this is out of the scope of this exercise, for now I am just imitating the // error message logging by just displaying the error message to the console. Console.WriteLine($"Error reading stream: {e.GetType().Name}."); } finally { // Here we make sure that we do not miss the very last word of the input stream // because of the EndOfStreamException thrown by the GetNextChar() method of the reader. if (nextWord != string.Empty) { AddStringToDictionary(wordDictionary, nextWord); } } // Sort the dictionary by word frequency and then alphabetically and then // create the required output. CreateOutput(SortDictionary(wordDictionary), output); } }