public TSQLCharacterReader(TextReader inputStream)
 {
     // can't take the risk that the passed in stream is not buffered
     // because of the high call number of Read
     _inputStream = new BufferedTextReader(inputStream);
     Position = -1;
 }
Example #2
0
        /// <summary>
        /// This is the answer to question 1.
        ///
        /// ASSUMPTIONS:
        /// ============
        ///
        /// 1. The text read by the reader only contains words of the English language, so in this
        /// case there is no provision for words of other languages written in different alphabets,
        /// or for numerals, etc. Please also see the assumptions listed in class CharExtensions.cs
        /// for more details.
        ///
        /// 2. The second assumption, and in accordance to assumption 1 above, is that when it comes
        /// to deciding if a character is part of a word or not, we assume that we are always dealing
        /// with characters of the English alphabet, both lower case and capitals, and with any special
        /// symbols that can be accepted as part of an English word, i.e. hyphen. In this case, there
        /// is no provision for characters of different alphabets, numerals, etc.
        ///
        /// 3. Another assumption is that it is preferable to deal with the input character stream
        /// dynamically, on the fly, instead of reading the whole stream and storing it in a local
        /// variable prior to processing it. This, for example, could help in situations of extremely
        /// long streams (such as when reading a character stream from a large file) that would require
        /// the use of extensive amounts of in-memory storage prior to processing.
        ///
        /// LOGIC:
        /// ======
        ///
        /// An instance of this class (DeveloperTestImplementationAsync) is created elsewhere in the code
        /// (for example in a unit test such as StandardTestAsync.TestQuestionOneAsync()). This method
        /// can also be called asynchronously as part of the solution of question 2. When it is called
        /// two objects are passed into it as dependencies using the method dependency injection pattern,
        /// the first one is a reader object, for example an instance of the SimpleCharacterReader or the
        /// SlowCharacterReader class, they both implement the ICharacterReader interface, and the second
        /// one is an output object, which is an instance of the Question1TestOutput class that implements
        /// the IOutputResult interface.
        ///
        /// The purpose of this method is to use these two objects in order to read a character stream,
        /// dynamically process the character stream in order to separate it into English words, then order
        /// those words by frequency and then alphabetically, and finally create an appropriate output in
        /// the required output format that will be tested by the relevant unit test mentioned above for its
        /// correctness (i.e. the words that it contains, the frequency of appearence of each word in the character
        /// stream, and whether these words have been ordered by frequency and alphabetically as required).
        ///
        /// Reading the character stream: In order to successfully read the character stream this method uses
        /// a simple do-while loop that in each iteration reads a new character, then process the character
        /// in order to make a decision if it should be accepted as part of an English word or not, and
        /// consequently adds the character in the next word or rejects it.
        ///
        /// EndOfStreamException: The algorithm then deals with the EndOfStreamException, thrown by the reader
        /// when the end of the character stream has been reached, by making sure in the finally sub-block of the
        /// try-catch-finally block that the very last word read is captured correctly and stored in the dictionary in the
        /// same way as with all the previous words.
        ///
        /// Finally, this method makes sure that the dictionary of words and word frequences is sorted
        /// according to the requirements, i.e. first by word frequency and then alphabetically, and then creates
        /// the desired output.
        ///
        /// Parts of this algorithm, such as processing a character, forming a word, sorting the dictionary, and
        /// creating the required output, have been implemented as separate methods in order to modularise and
        /// declutter the main algorithm. The purpose here is to improve its readibity and at the same time to
        /// demonstrate how to create methods for reoccurring tasks and functions, for improved reusability,
        /// and for making it more clear where and how the state of certain objects is changed (instead of changing the
        /// state of these objects all over the place). I have chosen to pass parameters back and forth to these
        /// methods (even if in some cases they are dealing with the member variables that are already available
        /// to them) in order to show that we could easily move them out of this class altogether, maybe into some
        /// sort of helper class where they could be re-used by other classes of a hypothetical bigger program.
        /// For simplicity I have left these methods in this class.
        /// </summary>
        /// <param name="reader">An object of a class that implements the ICharacterReader interface, which provides
        /// a method for reading the next character of a character stream.</param>
        /// <param name="output">An object of a class that implements the IOutputResult interface, which allows to
        /// format an output according to specified requirements.</param>
        public Task RunQuestionOne(ICharacterReader reader, IOutputResult output)
        {
            // This whole task can run asynchronously. This is useful when we need to run several readers
            // in parallel and we do not want to wait for the synchronous completion of each reader before
            // starting the next one.
            return(Task.Run(async() =>
            {
                // A string variable that helps us form the next word from the input character stream.
                string nextWord = string.Empty;

                using (reader)
                {
                    try
                    {
                        // This is the main loop that reads a stream of characters, one by one,
                        // processes the characters according to the assumptions made above, forms
                        // English words and stores these words to a dictionary collection,
                        // keeping also track of how often each of these words appear in the input
                        // stream (word frequency).
                        do
                        {
                            ProcessNextChar(reader.GetNextChar(), ref nextWord, wordDictionary);
                        } while (true);
                    }
                    catch (EndOfStreamException e)
                    {
                        // Normally an error message, like the one below, would be logged in a log file or
                        // log database, by being passed to an appropriate method of a dedicated logger object.
                        // As this is out of the scope of this exercise, for now I am just imitating
                        // logging the error message by just displaying the error message to the console.
                        Console.WriteLine($"Error reading stream: {e.GetType().Name}.");
                    }
                    finally
                    {
                        // Here we make sure that we do not miss out the very last word of the input stream
                        // because of the EndOfStreamException thrown by the GetNextChar() method of the reader.
                        if (nextWord != string.Empty)
                        {
                            AddStringToDictionary(wordDictionary, nextWord);
                        }
                    }

                    // Sort the dictionary by word frequency and then alphabetically and then
                    // create the required output. The delay period is part of the mechanism that
                    // allows to create an output on specified intervals and it is used only for
                    // question two. The default value is zero, meaning an immediate creation of the output.
                    if (!wasCallInitiatedInQuestionTwo)
                    {
                        CreateOutputAsync(SortDictionary(wordDictionary), output, defaultDelayPeriod);
                    }

                    // Allow some time for the completion of this task before exiting.
                    await DelayTimerAsync(questionOneTimeout);
                }
            }));
        }
Example #3
0
        bool IsNewLine(BlockReader blockReader, LineEndings lineEndings, ICharacterReader charReader, long position, out long begin, out long end)
        {
            begin = -1;
            end   = -1;

            uint v = blockReader.ReadValue(position);

            if (v == LineEndings.CR)
            {
                begin     = position;
                position += blockReader.MinCodePointSize;
                end       = position;
                if (position < blockReader.StreamLength)
                {
                    v = blockReader.ReadValue(position);
                    if (v == LineEndings.LF)
                    {
                        end += blockReader.MinCodePointSize;
                    }
                }
                return(true);
            }
            else if (v == LineEndings.LF)
            {
                begin     = position;
                end       = position + blockReader.MinCodePointSize;
                position -= blockReader.MinCodePointSize;
                if (position >= 0)
                {
                    v = blockReader.ReadValue(position);
                    if (v == LineEndings.CR)
                    {
                        begin = position;
                    }
                }
                return(true);
            }
            else
            {
                if (charReader.TryReadCharacter(blockReader, position, v, out uint ch, out long first, out long last))
                {
                    if (lineEndings.IsLNewLine(ch))
                    {
                        begin = blockReader.PositionFirstByte(first);
                        end   = blockReader.PositionFirstByte(last) + blockReader.MinCodePointSize;
                        return(true);
                    }
                }
            }
            return(false);
        }
Example #4
0
        public LineReader(Stream stream, Encoding encoding)
        {
            this.encoding = encoding;
            switch (encoding.WebName)
            {
            case "utf-8":
                blockReader = new BlockReader(stream);
                lineEndings = new UnicodeLineEndings();
                charReader  = new UTF8CharacterReader();
                break;

            case "utf-16":
                blockReader = new BlockReader16(stream, false);
                lineEndings = new UnicodeLineEndings();
                charReader  = new UTF16CharacterReader();
                break;

            case "utf-16BE":
                blockReader = new BlockReader16(stream, true);
                lineEndings = new UnicodeLineEndings();
                charReader  = new UTF16CharacterReader();
                break;

            case "utf-32":
                blockReader = new BlockReader32(stream, false);
                lineEndings = new UnicodeLineEndings();
                charReader  = new UTF32CharacterReader();
                break;

            case "utf-32BE":
                blockReader = new BlockReader32(stream, true);
                lineEndings = new UnicodeLineEndings();
                charReader  = new UTF32CharacterReader();
                break;

            default:
                blockReader = new BlockReader(stream);
                lineEndings = new LineEndings();
                charReader  = new SimpleCharacterReader();
                break;
            }

            InitNewLineMarker();

            TrimCharacters = lineEndings.CodePoints
                             .Where(c => c <= char.MaxValue && !char.IsSurrogate((char)c))
                             .Select(c => (char)c)
                             .Concat(new char[] { BOM })
                             .ToArray();
        }
        private string ReadFile(ICharacterReader cr)
        {
            char   c      = ' ';
            string myText = "";
            int    index  = 0;

            do
            {
                c       = cr.SimpleCharacterReader(index);
                myText += c;
                index++;
            }while (c != '¬');
            cr.Dispose();
            return(myText);
        }
Example #6
0
        private static void Main(string[] args)
        {
            var characterReaders = new ICharacterReader[]
            {
                new SlowCharacterReader(), new SimpleCharacterReader()
            };

            using (var wordCounter = new ParallelWordCounter(characterReaders, StringComparer.InvariantCultureIgnoreCase))
            {
                wordCounter.EnableLogging(PrintOrderedWordCount, TimeSpan.FromSeconds(10));
                var wordCount = wordCounter.GetWordCount();
            }

            Console.WriteLine("Done...");
            Console.ReadKey();
        }
        } //accessor for myDictionary mainly used for the testing

        /// <summary>
        /// Constructor accepting ICharReader interface object
        /// Gets tje word list from the reader, as long as there is text in the file
        /// Adds the words to the dictionary if they dont exist
        /// If the word has already been added, it adds 1 to the value of the word
        /// </summary>
        /// <param name="cr"></param>
        public ReadManager(ICharacterReader cr)
        {
            string text = ReadFile(cr);

            this.wordList = cr.MyListOfWords;
            for (int i = 0;
                 i < wordList.Count; i++)
            {
                if (myDictionary.ContainsKey(wordList[i]))
                {
                    myDictionary[wordList[i]]++;
                }
                else
                {
                    myDictionary.Add(wordList[i], 1);
                }
            }
            cr.Dispose();
        }
Example #8
0
		public Mark(ICharacterReader reader)
		{
			this.reader = reader;
			this.start = new Position(this.reader);
		}
Example #9
0
 public WordCounter(ICharacterReader reader, IEqualityComparer <string> comparer)
 {
     _reader   = reader;
     _comparer = comparer;
 }
Example #10
0
 public WordCounter(ICharacterReader reader) : this(reader, EqualityComparer <string> .Default)
 {
 }
Example #11
0
        public Position(ICharacterReader reader) :
			this(reader.Row, reader.Column)
        { }
Example #12
0
 public Task RunQuestionOne(ICharacterReader reader, IOutputResult output)
 {
     throw new System.NotImplementedException();
 }
Example #13
0
 /// <summary>
 /// Task that allows us to await the completion of the solution to question 1 without
 /// blocking the execution of the rest of the program.
 /// </summary>
 /// <param name="reader">An object of a class that implements the ICharacterReader interface, which provides
 /// a method for reading the next character of a character stream.</param>
 /// <param name="output">An object of a class that implements the IOutputResult interface, which allows to
 /// format an output according to specific requirements.</param>
 /// <returns>When the solution to Question 1 is complete the task returns.</returns>
 private async Task RunQuestionOneAsync(ICharacterReader reader, IOutputResult output)
 {
     await RunQuestionOne(reader, output);
 }
 public WordFrequencyAnalizer(ICharacterReader reader)
 {
     _reader = reader;
 }
 public Task RunQuestionOne(ICharacterReader reader, IOutputResult output, CancellationToken cancellationToken)
 {
     throw new NotImplementedException();
 }
Example #16
0
 public WordEnumerator(ICharacterReader reader)
 {
     _reader = reader;
 }
 public void RunQuestionOne(ICharacterReader reader, IOutputResult output)
 {
     throw new NotImplementedException();
 }
Example #18
0
        /// <summary>
        /// This is the answer to question 1.
        ///
        /// ASSUMPTIONS:
        /// ============
        ///
        /// 1. The text read by the reader only contains words of the English language, so in this
        /// case there is no provision for words of other languages written in different alphabets,
        /// or for numericals, etc. Please also see the assumptions listed in class CharExtensions.cs
        /// for more details.
        ///
        /// 2. The second assumption, and in accordance to assumption 1 above, is that when it comes
        /// to deciding if a character is part of a word or not, we assume that we are always dealing
        /// with characters of the English alphabet, both lower case and capitals. So, there is no
        /// provision for characters of different alphabets, numericals, etc.
        ///
        /// 3. Another assumption is that it is preferable to deal with the input character stream
        /// dynamically, on the fly, instead of reading the whole stream and storing it in a local
        /// variable first before processing it. This, for example, could help in situations of extremely
        /// long streams (for example reading a character stream from a file) that would require the use
        /// of extensive amounts of in-memory storage prior to processing.
        ///
        /// LOGIC:
        /// ======
        ///
        /// An instance of this class (DeveloperTestImplementation) is created elsewhere in the code
        /// (specifically in the unit test: StandardTest.TestQuestionOne()). At that moment two objects
        /// are passed to this method as dependencies using the method dependency injection pattern,
        /// the first one is a reader object, which is an instance of the SimpleCharacterReader class that
        /// implements the ICharacterReader interface, and the second one is an output object, which is an
        /// instance of the Question1TestOutput class that implements the IOutputResult interface.
        ///
        /// The purpose of this method is to use these two objects in order to read a character stream,
        /// dynamically process the character stream in order to separate it into English words, then order
        /// those words by frequency and then alphabetically, and finally create an appropriate output in
        /// the required output format that will be tested by the relevant unit test mentioned above for its
        /// correctness (i.e. the words that contains, the frequency that each word appears in the character
        /// stream, and whether these words have been ordered by frequency and alphabetically as required).
        ///
        /// Reading the character stream: In order to successfully read the character stream this method uses
        /// a simple do-while loop that in each iteration reads a new character, then decides if the character
        /// is a letter of the English alphabet or not (using the extension method IsLetter()) and accordingly
        /// either adds the character to the next word if it is indeed a letter or adds the word to a dictionary
        /// of words if the last read character is a white space, new line character, a comma, etc, i.e. anything
        /// other than a letter. When a word is added to the dictionary the algorithm makes sure that the
        /// string that holds the next word is initialised to an empty string in order to be able to hold the
        /// next word successfully. The algorithm then deals with the EndOfStreamException raised by the reader
        /// by making sure in the finally part of the try-catch-finally block that the very last word read is not
        /// lost, but stored in the dictionary in the same way with all the previous words.
        ///
        /// The last line of this method makes sure that the dictionary of words and word frequences is sorted
        /// according to the requirements, i.e. first by word frequency and then alphabetically, and then creates
        /// the desired output. These two tasks, sorting the dictionary and creating the required output, have
        /// been implemented in separate private method in order to declutter the main algorithm, improve its
        /// readibity, and demonstrate how to create methods for reoccurring tasks and functions, improving reusability,
        /// and making it more clear where and how we change the state of certain objects (instead of changing the
        /// state of objects all over the place). For example we can see that the method AddStringToDictionary(...)
        /// is called twice, once from inside the main do-while loop and then again from the finally sub-block of the
        /// try-catch-finally block.
        /// </summary>
        /// <param name="reader">An object of a class that implements the ICharacterReader interface, which provides
        /// a method for reading the next character of a character stream.</param>
        /// <param name="output">An object of a class that implements the IOutputResult interface, which allows to
        /// format an output according to specific requirements.</param>
        public void RunQuestionOne(ICharacterReader reader, IOutputResult output)
        {
            // A dictionary collection that holds words as strings and the frequency of their appearence as integers.
            IDictionary <string, int> wordDictionary = new Dictionary <string, int>();

            // A string variable that helps us form the next word from the input character stream.
            string nextWord = string.Empty;

            using (reader)
            {
                try
                {
                    // This is the main loop that reads a stream of characters, one by one,
                    // splits the stream into English words, according to the assumptions
                    // made above, and then stores the words into a dictionary collection
                    // keeping also track of how often each word appears in the input stream
                    // (word frequency).
                    do
                    {
                        // Read the next character from the stream of characters.
                        char nextChar = reader.GetNextChar();

                        // As long as the next character is a letter keep adding it to the next word,
                        // as soon as you have encountered a word's end (indicated by a whitespace
                        // character, a symbol such as a comma or a full stop, a new line character or
                        // something similar) add the word to the dictionary and reset the variable
                        // in order to be used to form the next word from scratch.
                        if (nextChar.IsLetter())
                        {
                            nextWord += nextChar.ToString().ToLower();
                        }
                        else
                        {
                            if (nextWord != string.Empty)
                            {
                                AddStringToDictionary(wordDictionary, nextWord);
                                nextWord = string.Empty;
                            }
                        }
                    } while (true);
                }
                catch (EndOfStreamException e)
                {
                    // Normally an error message, like the one below, would be logged in a log file or
                    // log database, by being passed to an appropriate method of a dedicated logger object.
                    // As this is out of the scope of this exercise, for now I am just imitating the
                    // error message logging by just displaying the error message to the console.
                    Console.WriteLine($"Error reading stream: {e.GetType().Name}.");
                }
                finally
                {
                    // Here we make sure that we do not miss the very last word of the input stream
                    // because of the EndOfStreamException thrown by the GetNextChar() method of the reader.
                    if (nextWord != string.Empty)
                    {
                        AddStringToDictionary(wordDictionary, nextWord);
                    }
                }

                // Sort the dictionary by word frequency and then alphabetically and then
                // create the required output.
                CreateOutput(SortDictionary(wordDictionary), output);
            }
        }