예제 #1
0
        public static void Main(string[] args)
        {
            worddata[] masterList = new worddata[0];

            //This is used to to rotate through each sort to make sure they all get used at least once as we run through the files.
            int sortCounter = 0;

            Dictionary <string, string> Filenames = new Dictionary <string, string>();

            Filenames.Add("1 Nephi", "01-1 Nephi.txt");
            Filenames.Add("2 Nephi", "02-2 Nephi.txt");
            Filenames.Add("Jacob", "03-Jacob.txt");
            Filenames.Add("Enos", "04-Enos.txt");
            Filenames.Add("Jarom", "05-Jarom.txt");
            Filenames.Add("Omni", "06-Omni.txt");
            Filenames.Add("Words of Mormon", "07-Words of Mormon.txt");
            Filenames.Add("Mosiah", "08-Mosiah.txt");
            Filenames.Add("Alma", "09-Alma.txt");
            Filenames.Add("Helaman", "10-Helaman.txt");
            Filenames.Add("3 Nephi", "11-3 Nephi.txt");
            Filenames.Add("4 Nephi", "12-4 Nephi.txt");
            Filenames.Add("Mormon", "13-Mormon.txt");
            Filenames.Add("Ether", "14-Ether.txt");
            Filenames.Add("Moroni", "15-Moroni.txt");

            Console.WriteLine("INDIVIDUAL BOOKS > 2%");

            //Run through each file and analyze the text of individual books
            foreach (KeyValuePair <string, string> item in Filenames)
            {
                worddata[] newSortedArray = analyzeText(item.Value, item.Key, sortCounter);

                foreach (worddata thing in newSortedArray)
                {
                    if (thing.percent > 2)
                    {
                        Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0"));
                    }
                }

                Console.WriteLine();


                //Update the master list by merging the new book list into the master list
                merge updatedMaster = new merge(masterList, newSortedArray);

                masterList = updatedMaster.getMasterList();

                //This ensures each type of sort gets used
                sortCounter++;
                sortCounter %= 3;
            }

            //Prints out the merged master list after all the books have been analyzed.
            Console.WriteLine("MASTER LIST > 2%");
            foreach (worddata thing in masterList)
            {
                if (thing.percent > 2)
                {
                    Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0"));
                }
            }

            Console.WriteLine();

            //Prints the orderd list of all books and the stats around the word "christ" in the book
            Console.WriteLine("MASTER LIST == christ");
            foreach (worddata thing in masterList)
            {
                if (thing.word == "christ")
                {
                    Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0"));
                }
            }

            Console.WriteLine();

            //Analyzes and prints all the word data for the entire Book of Mormon as a whole
            Console.WriteLine("FULL TEXT > 2%");
            worddata[] fullText = analyzeText("00-Book of Mormon.txt", "Book of Mormon", 0);

            worddata[] newSortedArray1 = new insertionsort(fullText).getWorddata();

            foreach (worddata thing in newSortedArray1)
            {
                if (thing.percent > 2)
                {
                    Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0"));
                }
            }
        }
예제 #2
0
        static private worddata[] analyzeText(string Filename, string bookname, int sortCounter)
        {
            /*
             * Performs a very naive analysis of the words in the text, returning the SORTED list of WordData items
             * lowercase the entire text
             * split the text by whitespace to get a list of words
             * convert each word to the longest run of characters
             * eliminate any words that are empty after conversion to characters
             * count up the occurance of each word into a dictionary of: word -> count
             * create a WordData item for each word in our list of words
             * sort the WordData list using Bubble Sort, Insertion Sort, or Selection Sort:
             *      1. highest percentage [descending]
             *      2. highest count (if percentages are equal) [descending]
             *
             *      3. lowest alpha order (if percentages and count are equal) [ascending]
             */

            // Read the file as one string.
            string text = System.IO.File.ReadAllText(Filename);

            //Convert everything to lowercase
            text = text.ToLower();

            //Split text on whitespace
            string[] separators = { " ", "\n" };
            string[] sepWords   = text.Split(separators, StringSplitOptions.RemoveEmptyEntries);

            //Only keep the longest strings of characters that make up a word
            // for example "(between)" becomes "between" or "people-Lehi" becomes "people" because we only keep the longest run of letters
            string[] countWords = new string[sepWords.Length];
            int      badCount   = 0;

            for (int i = 0; i < sepWords.Length; i++)
            {
                string match = Regex.Match(sepWords[i], @"([a-zA-Z]+)").Value;
                if (match == "" || match == null)
                {
                    badCount++;
                }
                else
                {
                    countWords[i - badCount] = match;
                }
            }

            //eliminate any words that are empty after conversion to characters
            string[] countWords1 = new string[sepWords.Length - badCount];
            for (int i = 0; i < sepWords.Length - badCount; i++)
            {
                countWords1[i] = countWords[i];
            }

            //Create dictionary to hold words and their count
            Dictionary <string, int> dic = new Dictionary <string, int>();

            //count up the occurance of each word into a dictionary of: word -> count
            foreach (string s in countWords1)
            {
                if (dic.Keys.Contains(s))
                {
                    dic[s] += 1;
                }
                else
                {
                    dic.Add(s, 1);
                }
            }

            //Creates an array of objects to hold all word data
            worddata[] wordDataArray = new worddata[dic.Count];
            int        iCount        = 0;

            foreach (KeyValuePair <string, int> word in dic)
            {
                //Calculate percent and round to 1 decimal point
                double tempNumber = word.Value / (double)countWords1.Length * 100;
                tempNumber = Math.Round(tempNumber, 1);

                wordDataArray[iCount] = new worddata(bookname, word.Key, word.Value, tempNumber);
                iCount++;
            }

            worddata[] newSortedArray;
            //This just rotates the sort being used on each file
            if (sortCounter == 0)
            {
                newSortedArray = new insertionsort(wordDataArray).getWorddata();
            }
            else if (sortCounter == 1)
            {
                newSortedArray = new bubblesort(wordDataArray).getWorddata();
            }
            else
            {
                newSortedArray = new selectionsort(wordDataArray).getWorddata();
            }

            return(newSortedArray);
        }