//Uses insertion sort algorithm to sort word in alphabetical order public worddata[] InsertionSortStrings() { for (int i = 1; i < DataArray.Length; i++) { worddata temp = DataArray[i]; int j = i - 1; while (j >= 0 && DataArray[j].word.CompareTo(temp.word) > 0) { DataArray[j + 1] = DataArray[j]; j--; } DataArray[j + 1] = temp; } return(DataArray); }
public static void Main(string[] args) { worddata[] masterList = new worddata[0]; //This is used to to rotate through each sort to make sure they all get used at least once as we run through the files. int sortCounter = 0; Dictionary <string, string> Filenames = new Dictionary <string, string>(); Filenames.Add("1 Nephi", "01-1 Nephi.txt"); Filenames.Add("2 Nephi", "02-2 Nephi.txt"); Filenames.Add("Jacob", "03-Jacob.txt"); Filenames.Add("Enos", "04-Enos.txt"); Filenames.Add("Jarom", "05-Jarom.txt"); Filenames.Add("Omni", "06-Omni.txt"); Filenames.Add("Words of Mormon", "07-Words of Mormon.txt"); Filenames.Add("Mosiah", "08-Mosiah.txt"); Filenames.Add("Alma", "09-Alma.txt"); Filenames.Add("Helaman", "10-Helaman.txt"); Filenames.Add("3 Nephi", "11-3 Nephi.txt"); Filenames.Add("4 Nephi", "12-4 Nephi.txt"); Filenames.Add("Mormon", "13-Mormon.txt"); Filenames.Add("Ether", "14-Ether.txt"); Filenames.Add("Moroni", "15-Moroni.txt"); Console.WriteLine("INDIVIDUAL BOOKS > 2%"); //Run through each file and analyze the text of individual books foreach (KeyValuePair <string, string> item in Filenames) { worddata[] newSortedArray = analyzeText(item.Value, item.Key, sortCounter); foreach (worddata thing in newSortedArray) { if (thing.percent > 2) { Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0")); } } Console.WriteLine(); //Update the master list by merging the new book list into the master list merge updatedMaster = new merge(masterList, newSortedArray); masterList = updatedMaster.getMasterList(); //This ensures each type of sort gets used sortCounter++; sortCounter %= 3; } //Prints out the merged master list after all the books have been analyzed. Console.WriteLine("MASTER LIST > 2%"); foreach (worddata thing in masterList) { if (thing.percent > 2) { Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0")); } } Console.WriteLine(); //Prints the orderd list of all books and the stats around the word "christ" in the book Console.WriteLine("MASTER LIST == christ"); foreach (worddata thing in masterList) { if (thing.word == "christ") { Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0")); } } Console.WriteLine(); //Analyzes and prints all the word data for the entire Book of Mormon as a whole Console.WriteLine("FULL TEXT > 2%"); worddata[] fullText = analyzeText("00-Book of Mormon.txt", "Book of Mormon", 0); worddata[] newSortedArray1 = new insertionsort(fullText).getWorddata(); foreach (worddata thing in newSortedArray1) { if (thing.percent > 2) { Console.WriteLine(thing.book + "," + thing.word + "," + thing.count + "," + thing.percent.ToString("0.0")); } } }
private void updateMasterList() { int size; if (masterList.Length == 0) { //if masterList is empty then just stick whole sorted array in the masterList to start it size = bookDataList.Length; masterList = new worddata[size]; masterList = bookDataList; return; } size = masterList.Length + bookDataList.Length; //Creates a temp array to store sorted items as the lists are merged and sorted simultaneously worddata[] temp = new worddata[size]; int indexMaster = 0; int indexBook = 0; //Ensures that each list is iterated through completely so there are no out of bounds errors bool bookFinishedFirst = false; //Iterates through masterList comparing to the new array of words from the analyzed book and sorting by percent and then count and then alphabetically while (indexMaster < masterList.Length) { if (indexBook == bookDataList.Length) { bookFinishedFirst = true; break; } if (masterList[indexMaster].percent > bookDataList[indexBook].percent) { temp[indexMaster + indexBook] = masterList[indexMaster]; indexMaster++; } else if (masterList[indexMaster].percent < bookDataList[indexBook].percent) { temp[indexMaster + indexBook] = bookDataList[indexBook]; indexBook++; } else if (masterList[indexMaster].percent == bookDataList[indexBook].percent) { if (masterList[indexMaster].count > bookDataList[indexBook].count) { temp[indexMaster + indexBook] = masterList[indexMaster]; indexMaster++; } else if (masterList[indexMaster].count < bookDataList[indexBook].count) { temp[indexMaster + indexBook] = bookDataList[indexBook]; indexBook++; } else { if (masterList[indexMaster].word.CompareTo(bookDataList[indexBook].word) > 0) { temp[indexMaster + indexBook] = masterList[indexMaster]; indexMaster++; } else { temp[indexMaster + indexBook] = bookDataList[indexBook]; indexBook++; } } } } //After iterating through masterList if there is anything left over in the new array of words then it is appended to the end of masterList while (indexBook < bookDataList.Length) { temp[indexMaster + indexBook] = bookDataList[indexBook]; indexBook++; } if (bookFinishedFirst) { while (indexMaster < masterList.Length) { temp[indexMaster + indexBook] = masterList[indexMaster]; indexMaster++; } } masterList = temp; }
static private worddata[] analyzeText(string Filename, string bookname, int sortCounter) { /* * Performs a very naive analysis of the words in the text, returning the SORTED list of WordData items * lowercase the entire text * split the text by whitespace to get a list of words * convert each word to the longest run of characters * eliminate any words that are empty after conversion to characters * count up the occurance of each word into a dictionary of: word -> count * create a WordData item for each word in our list of words * sort the WordData list using Bubble Sort, Insertion Sort, or Selection Sort: * 1. highest percentage [descending] * 2. highest count (if percentages are equal) [descending] * * 3. lowest alpha order (if percentages and count are equal) [ascending] */ // Read the file as one string. string text = System.IO.File.ReadAllText(Filename); //Convert everything to lowercase text = text.ToLower(); //Split text on whitespace string[] separators = { " ", "\n" }; string[] sepWords = text.Split(separators, StringSplitOptions.RemoveEmptyEntries); //Only keep the longest strings of characters that make up a word // for example "(between)" becomes "between" or "people-Lehi" becomes "people" because we only keep the longest run of letters string[] countWords = new string[sepWords.Length]; int badCount = 0; for (int i = 0; i < sepWords.Length; i++) { string match = Regex.Match(sepWords[i], @"([a-zA-Z]+)").Value; if (match == "" || match == null) { badCount++; } else { countWords[i - badCount] = match; } } //eliminate any words that are empty after conversion to characters string[] countWords1 = new string[sepWords.Length - badCount]; for (int i = 0; i < sepWords.Length - badCount; i++) { countWords1[i] = countWords[i]; } //Create dictionary to hold words and their count Dictionary <string, int> dic = new Dictionary <string, int>(); //count up the occurance of each word into a dictionary of: word -> count foreach (string s in countWords1) { if (dic.Keys.Contains(s)) { dic[s] += 1; } else { dic.Add(s, 1); } } //Creates an array of objects to hold all word data worddata[] wordDataArray = new worddata[dic.Count]; int iCount = 0; foreach (KeyValuePair <string, int> word in dic) { //Calculate percent and round to 1 decimal point double tempNumber = word.Value / (double)countWords1.Length * 100; tempNumber = Math.Round(tempNumber, 1); wordDataArray[iCount] = new worddata(bookname, word.Key, word.Value, tempNumber); iCount++; } worddata[] newSortedArray; //This just rotates the sort being used on each file if (sortCounter == 0) { newSortedArray = new insertionsort(wordDataArray).getWorddata(); } else if (sortCounter == 1) { newSortedArray = new bubblesort(wordDataArray).getWorddata(); } else { newSortedArray = new selectionsort(wordDataArray).getWorddata(); } return(newSortedArray); }