public void ValidSetAt() { flexibleWordCountModel.SetAt(true, 3, 999); int x = flexibleWordCountModel.GetAt(true, 3); Assert.AreEqual(x, 999); x = flexibleWordCountModel.GetAt(false, 3); Assert.AreEqual(x, 28); flexibleWordCountModel.SetAt(false, 3, 888); x = flexibleWordCountModel.GetAt(false, 3); Assert.AreEqual(x, 888); }
/// <summary> /// The method to process a text and get the word counts. /// </summary> /// <param name="text">The text of the text itself.</param> /// <param name="model">The flexible word count model to fill with counts.</param> private void GenerateCounts(TextReader text, IFlexibleWordCountModel model) { int arraySize = UniversalConstants.CountSize; int multiplier = UniversalConstants.ConstantMultiplier; int[] countsWithQuotes = new int[arraySize]; int[] countsWithoutQuotes = new int[arraySize]; int[] frequencyWithQuotes = new int[arraySize]; int[] frequencyWithoutQuotes = new int[arraySize]; int totalWordCountWithQuotes = 0; int totalWordCountWithoutQuotes = 0; string delimPattern = @"\s+"; Regex delim = new Regex(delimPattern); bool inQuotes = false; bool continueWord = false; string firstHalfOfWord = ""; int previousWordLength = 0; //bool mismatchedQuotationMarks = false; string line; while ((line = text.ReadLine()) != null) // read text file line by line until end of line { if (line.Length != 0) // skip line if empty { line = Regex.Replace(line, "[–—]", " "); // treat em dashes and en dashes as spaces since they don't link words together like hyphens string[] wordsArray = delim.Split(line.Trim()); // split the line using delimiter for (int i = 0; i < wordsArray.Length; i++) // iterate through split array { string currentWord = wordsArray[i]; // grab a single word to count from split array if (continueWord) // this conditional handles the case of if the previous line ends with a hyphen { if (currentWord[0] >= 'A' && currentWord[0] <= 'Z') // if the first letter of the current word is uppercase it means the previous hyphen was used incorrectly. don't change the current word and don't uncount the previous wordlength { firstHalfOfWord = ""; continueWord = false; } else // append the previous word to the front of the current word if the hyphen was used correctly { currentWord = firstHalfOfWord + currentWord; firstHalfOfWord = ""; continueWord = false; if (inQuotes) // uncount previous wordlength from counts with quotes if currently inside of quotations { countsWithQuotes[previousWordLength - 1]--; totalWordCountWithQuotes--; } else // uncount previous wordlength for both counts if currently outside of quotations { countsWithQuotes[previousWordLength - 1]--; countsWithoutQuotes[previousWordLength - 1]--; totalWordCountWithQuotes--; totalWordCountWithoutQuotes--; } } } // if the last word of the line ends with a hyphen, store the word in a variable, removing the hyphen if (i == wordsArray.Length - 1) { if (currentWord[currentWord.Length - 1] == '-') { firstHalfOfWord = currentWord.Substring(0, currentWord.Length - 1); continueWord = true; } } // if it locates a starting quotation mark, set as inside quotations if (currentWord[0] == '"' || currentWord[0] == '“') { inQuotes = true; } string modifiedCurrentWord = Regex.Replace(currentWord, "[\"]", ""); // remove quotes from the current word modifiedCurrentWord = Regex.Replace(modifiedCurrentWord, "[^a-zA-Z0-9']+$", ""); // remove non-alphanumeric characters from the end of the word except for apostrophes // Debug.Print(modifiedCurrentWord); if (!(modifiedCurrentWord.Length == 0)) { int wordLength = modifiedCurrentWord.Length; previousWordLength = wordLength; // variable used in case a wordlength count has to be uncounted when counting the next word if (!inQuotes) // if outside of quotations, increase count for both the count including and excluding words in quotations { totalWordCountWithQuotes++; totalWordCountWithoutQuotes++; if (wordLength < countsWithQuotes.Length) { countsWithQuotes[wordLength - 1]++; countsWithoutQuotes[wordLength - 1]++; } else { countsWithQuotes[countsWithQuotes.Length - 1]++; countsWithoutQuotes[countsWithoutQuotes.Length - 1]++; } } else // if inside of quotations, increase count for only the count including words in quotations { totalWordCountWithQuotes++; if (wordLength < countsWithQuotes.Length) { countsWithQuotes[wordLength - 1]++; } else { countsWithQuotes[countsWithQuotes.Length - 1]++; } } } // if it locates an ending quotation mark, set as no longer inside quotations if (currentWord[currentWord.Length - 1] == '"' || currentWord[currentWord.Length - 1] == '”') { inQuotes = false; } } } } // calculates frequency per 1000 words for (int i = 0; i < countsWithQuotes.Length; i++) { frequencyWithQuotes[i] = (int)(((double)countsWithQuotes[i] / totalWordCountWithQuotes) * multiplier); frequencyWithoutQuotes[i] = (int)(((double)countsWithoutQuotes[i] / totalWordCountWithoutQuotes) * multiplier); } Debug.Write("\nTotal with quotes: " + totalWordCountWithQuotes); Debug.Write("\nTotal without quotes: " + totalWordCountWithoutQuotes); // determines if there are mismatched quotation marks //if (inQuotes) //{ // mismatchedQuotationMarks = true; //} // set wordlength counts for the model for (int i = 0; i < frequencyWithQuotes.Length; i++) { model.SetAt(true, i, frequencyWithQuotes[i]); } for (int i = 0; i < frequencyWithoutQuotes.Length; i++) { model.SetAt(false, i, frequencyWithoutQuotes[i]); } }