public TextModel(string name, IFlexibleWordCountModel counts) { SetName(name); SetIncludeQuotes(true); if (counts == null) { throw new ArgumentException("counts must not be null."); } _counts = counts.Copy(); _length = _counts.GetLength(); }
public ITextModel GetTextModel(string name, IFlexibleWordCountModel counts) { if (counts == null) { throw new ArgumentException("Counts model must not be null."); } if (counts.GetLength() < 1) { throw new ArgumentException("Counts model must not have length 0."); } return(new TextModel(name, counts)); }
public void Initialize() { _modelFactory = new ModelFactory(); int[] countsWithQuotes = new int[] { 0, 3, 1, 4, 2, 5 }; int[] countsWithoutQuotes = new int[] { 6, 9, 7, 10, 8, 11 }; ISingleWordCountModel withQuotes = _modelFactory.GetSingleCountModel(countsWithQuotes); ISingleWordCountModel withoutQuotes = _modelFactory.GetSingleCountModel(countsWithoutQuotes); wordCountModel = _modelFactory.GetFlexibleCountModel(withQuotes, withoutQuotes); nullWordCountModel = null; model = new TextModel("model", wordCountModel); }
public ITextModel GetOne(Expression <Func <Text, bool> > criteria) { Text text = _db.Texts.FirstOrDefault(criteria); if (text == null) { return(null); } IFlexibleWordCountModel counts = GetCountsFromText(text); ITextModel model = _modelFactory.GetTextModel(text.Name, counts); model.SetAuthor(text.Author); model.SetIncludeQuotes(Convert.ToBoolean(text.IncludeQuotes)); return(model); }
public void Initialize() { _modelFactory = new ModelFactory(); int[] countsOne = new int[] { 1, 2, 3, 4, 5 }; singleCountIncludeQuotes = _modelFactory.GetSingleCountModel(countsOne); int[] countsTwo = new int[] { 1, 14, 3, 28, 5 }; singleCountExcludeQuotes = _modelFactory.GetSingleCountModel(countsTwo); singleCountNull = null; int[] countsLengthOne = new int[] { 55 }; singleCountLengthOne = _modelFactory.GetSingleCountModel(countsLengthOne); int[] countsLengthZeroA = new int[0]; singleCountLengthZeroA = new FakeSingleWordCountModel(countsLengthZeroA); int[] countsLengthZeroB = new int[0]; singleCountLengthZeroB = new FakeSingleWordCountModel(countsLengthZeroB); int[] countsWithNegative = new int[] { 1, -2, 3, 4, 5 }; singleCountNegativeCount = new FakeSingleWordCountModel(countsWithNegative); flexibleWordCountModel = _modelFactory.GetFlexibleCountModel(singleCountIncludeQuotes, singleCountExcludeQuotes); }
/// <summary> /// The method to process a text and get the word counts. /// </summary> /// <param name="text">The text of the text itself.</param> /// <param name="model">The flexible word count model to fill with counts.</param> private void GenerateCounts(TextReader text, IFlexibleWordCountModel model) { int arraySize = UniversalConstants.CountSize; int multiplier = UniversalConstants.ConstantMultiplier; int[] countsWithQuotes = new int[arraySize]; int[] countsWithoutQuotes = new int[arraySize]; int[] frequencyWithQuotes = new int[arraySize]; int[] frequencyWithoutQuotes = new int[arraySize]; int totalWordCountWithQuotes = 0; int totalWordCountWithoutQuotes = 0; string delimPattern = @"\s+"; Regex delim = new Regex(delimPattern); bool inQuotes = false; bool continueWord = false; string firstHalfOfWord = ""; int previousWordLength = 0; //bool mismatchedQuotationMarks = false; string line; while ((line = text.ReadLine()) != null) // read text file line by line until end of line { if (line.Length != 0) // skip line if empty { line = Regex.Replace(line, "[–—]", " "); // treat em dashes and en dashes as spaces since they don't link words together like hyphens string[] wordsArray = delim.Split(line.Trim()); // split the line using delimiter for (int i = 0; i < wordsArray.Length; i++) // iterate through split array { string currentWord = wordsArray[i]; // grab a single word to count from split array if (continueWord) // this conditional handles the case of if the previous line ends with a hyphen { if (currentWord[0] >= 'A' && currentWord[0] <= 'Z') // if the first letter of the current word is uppercase it means the previous hyphen was used incorrectly. don't change the current word and don't uncount the previous wordlength { firstHalfOfWord = ""; continueWord = false; } else // append the previous word to the front of the current word if the hyphen was used correctly { currentWord = firstHalfOfWord + currentWord; firstHalfOfWord = ""; continueWord = false; if (inQuotes) // uncount previous wordlength from counts with quotes if currently inside of quotations { countsWithQuotes[previousWordLength - 1]--; totalWordCountWithQuotes--; } else // uncount previous wordlength for both counts if currently outside of quotations { countsWithQuotes[previousWordLength - 1]--; countsWithoutQuotes[previousWordLength - 1]--; totalWordCountWithQuotes--; totalWordCountWithoutQuotes--; } } } // if the last word of the line ends with a hyphen, store the word in a variable, removing the hyphen if (i == wordsArray.Length - 1) { if (currentWord[currentWord.Length - 1] == '-') { firstHalfOfWord = currentWord.Substring(0, currentWord.Length - 1); continueWord = true; } } // if it locates a starting quotation mark, set as inside quotations if (currentWord[0] == '"' || currentWord[0] == '“') { inQuotes = true; } string modifiedCurrentWord = Regex.Replace(currentWord, "[\"]", ""); // remove quotes from the current word modifiedCurrentWord = Regex.Replace(modifiedCurrentWord, "[^a-zA-Z0-9']+$", ""); // remove non-alphanumeric characters from the end of the word except for apostrophes // Debug.Print(modifiedCurrentWord); if (!(modifiedCurrentWord.Length == 0)) { int wordLength = modifiedCurrentWord.Length; previousWordLength = wordLength; // variable used in case a wordlength count has to be uncounted when counting the next word if (!inQuotes) // if outside of quotations, increase count for both the count including and excluding words in quotations { totalWordCountWithQuotes++; totalWordCountWithoutQuotes++; if (wordLength < countsWithQuotes.Length) { countsWithQuotes[wordLength - 1]++; countsWithoutQuotes[wordLength - 1]++; } else { countsWithQuotes[countsWithQuotes.Length - 1]++; countsWithoutQuotes[countsWithoutQuotes.Length - 1]++; } } else // if inside of quotations, increase count for only the count including words in quotations { totalWordCountWithQuotes++; if (wordLength < countsWithQuotes.Length) { countsWithQuotes[wordLength - 1]++; } else { countsWithQuotes[countsWithQuotes.Length - 1]++; } } } // if it locates an ending quotation mark, set as no longer inside quotations if (currentWord[currentWord.Length - 1] == '"' || currentWord[currentWord.Length - 1] == '”') { inQuotes = false; } } } } // calculates frequency per 1000 words for (int i = 0; i < countsWithQuotes.Length; i++) { frequencyWithQuotes[i] = (int)(((double)countsWithQuotes[i] / totalWordCountWithQuotes) * multiplier); frequencyWithoutQuotes[i] = (int)(((double)countsWithoutQuotes[i] / totalWordCountWithoutQuotes) * multiplier); } Debug.Write("\nTotal with quotes: " + totalWordCountWithQuotes); Debug.Write("\nTotal without quotes: " + totalWordCountWithoutQuotes); // determines if there are mismatched quotation marks //if (inQuotes) //{ // mismatchedQuotationMarks = true; //} // set wordlength counts for the model for (int i = 0; i < frequencyWithQuotes.Length; i++) { model.SetAt(true, i, frequencyWithQuotes[i]); } for (int i = 0; i < frequencyWithoutQuotes.Length; i++) { model.SetAt(false, i, frequencyWithoutQuotes[i]); } }
//For test purposes. Comment out later. public void GenerateCountsTestMethod(TextReader text, IFlexibleWordCountModel model) { GenerateCounts(text, model); }
public void ConstructorInputNegativeCount() { flexibleWordCountModel = _modelFactory.GetFlexibleCountModel(singleCountNegativeCount, singleCountExcludeQuotes); }
public void ConstructorInputUnequalLength() { flexibleWordCountModel = _modelFactory.GetFlexibleCountModel(singleCountLengthOne, singleCountExcludeQuotes); }
public void ConstructorLengthZeroInput() { flexibleWordCountModel = _modelFactory.GetFlexibleCountModel(singleCountLengthZeroA, singleCountLengthZeroB); }
public void ConstructorDuplicateInput() { flexibleWordCountModel = _modelFactory.GetFlexibleCountModel(singleCountIncludeQuotes, singleCountIncludeQuotes); }
public void ConstructorNullInput() { flexibleWordCountModel = _modelFactory.GetFlexibleCountModel(singleCountNull, singleCountExcludeQuotes); }
public void ValidConstruction() { flexibleWordCountModel = _modelFactory.GetFlexibleCountModel(singleCountIncludeQuotes, singleCountExcludeQuotes); }