} //ParagraphList() /// <summary> /// Paramaterized constructor /// </summary> /// <param name="t">Text object to pull Paragraphs from</param> public ParagraphList(Text t) { List <string> tokens = new List <string>(t.GetTokens()); int lastToken, firstToken; //token the search starts from & last token in the paragraph int result; //location of first \n or \r found double total = 0; //for avg word count //add first paragraph -- no need to worry about location Paragraphs.Add(new Paragraph(t, 0)); NumParagraphs = 1; firstToken = 0; while (true) { //determine end of paragraph while (true) { result = tokens.IndexOf("\n", firstToken); if ((tokens.IndexOf("\r", firstToken) < result && tokens.IndexOf("\r", firstToken) != -1) || result == -1) { result = tokens.IndexOf("\r", firstToken); } if (result + 1 >= tokens.Count) //end of file { lastToken = tokens.Count - 1; break; }//if else if (tokens[result].Equals(tokens[result + 1])) //two consecutive newlines or returns found { lastToken = result; break; }//else if else { firstToken = result + 1; } }//while if (lastToken != tokens.Count - 1) { Paragraphs.Add(new Paragraph(t, lastToken + 1)); //add the NEXT paragraph to the list NumParagraphs++; firstToken = lastToken + 1; }//if else //Paragraph found is last one -- already added on previous loop { break; } }//while //determine avg word count for (int i = 0; i < NumParagraphs; i++) { total += Paragraphs[i].NumWords; } //for AvgWordCount = total / NumParagraphs; } //ParagraphList(Text)
}//Words() /// <summary> /// Paramaterized constructor /// </summary> /// <param name="t">Text file to be split into DistinctWords</param> public Words(Text t) { words = new List <DistinctWord>(); List <String> tokens = new List <String>(); tokens = t.GetTokens(); foreach (string token in tokens) { words.Add(new DistinctWord(token)); }//foreach Alphabetize(); }//Words(Text)
} //Sentence() /// <summary> /// Paramaterized constructor /// </summary> /// <param name="t">Text to pull sentence from</param> /// <param name="location">token index where sentence begins</param> #region SentCon public Sentence(Text t, int location) { Regex r = new Regex(@"(\W)"); //find end location List <string> tokens = new List <string>(t.GetTokens()); int end = tokens.IndexOf(".", location); //assume the first . is the end of the sentence if ((tokens.IndexOf("?", location) < end && tokens.IndexOf("?", location) != -1) || end == -1) //if there is a ? and it's before the first . { end = tokens.IndexOf("?", location); //first ? is the end of the sentence } if ((tokens.IndexOf("!", location) < end && tokens.IndexOf("!", location) != -1) || end == -1) //if there is a ! and it's before the first ./? { end = tokens.IndexOf("!", location); //first ! is the end of the sentence } FirstToken = location; if (end != -1) { LastToken = end; } else //sentence ends without punctuation { LastToken = tokens.Count() - 1; } //generate sentence FullSentence = Utility.FormatText(tokens, FirstToken, LastToken, 0, 0); //find word count WordCount = (LastToken - FirstToken); for (int i = FirstToken; i < LastToken; i++) { if (r.IsMatch(tokens[i])) { WordCount--; } } //for //find avg word length double total = 0; for (int i = FirstToken; i < LastToken; i++) { total += tokens[i].Length; } //for AvgWordLength = total / WordCount; } //Sentence(Text, int)
} //SentenceList() /// <summary> /// Paramaterized constructor /// </summary> /// <param name="t">Text to be split into sentences</param> public SentenceList(Text t) { int location; //stores location of punctuation mark List <string> tokens = t.GetTokens(); //tokens Sentences = new List <Sentence>(); //initialize sentence list int nextSent = 0; //location of start of next sentence double total = 0; //combined amount of words in list, used for avg //add first sentence - no need to worry about start location, it's always 0 Sentences.Add(new Sentence(t, 0)); NumSentences = 1; while (true) { //locate nearest punctuation mark location = tokens.IndexOf(".", nextSent); if ((location > tokens.IndexOf("?", nextSent) && tokens.IndexOf("?", nextSent) != -1) || location == -1) { location = tokens.IndexOf("?", nextSent); } if ((location > tokens.IndexOf("!", nextSent) && tokens.IndexOf("!", nextSent) != -1) || location == -1) { location = tokens.IndexOf("!", nextSent); } nextSent = location + 1; if (location != -1 && nextSent < tokens.Count()) { Sentences.Add(new Sentence(t, (location + 1))); NumSentences++; }//if else { break; } }//while //find avg word count foreach (Sentence s in Sentences) { total += s.WordCount; } //foreach AvgWordCount = total / NumSentences; } //SentenceList(Text)
} //Paragraph() /// <summary> /// Paramterized constructor /// </summary> /// <param name="t">Text to pull paragraph from</param> /// <param name="location">Location to begin extracting paragraph</param> public Paragraph(Text t, int location) { int result; //location of \n or \r found double total = 0; //used for average calculation List <string> tokens = new List <String>(t.GetTokens()); //token list FirstToken = location; //determine end of paragraph while (true) { result = tokens.IndexOf("\n", location); if ((tokens.IndexOf("\r", location) < result && tokens.IndexOf("\r", location) != -1) || result == -1) { result = tokens.IndexOf("\r", location); } if (result + 1 >= tokens.Count) //end of file { LastToken = tokens.Count - 1; break; }//if else if (tokens[result].Equals(tokens[result + 1])) //two consecutive newlines or returns found { LastToken = result; break; }//else if else { location = result + 1; } }//while NumWords = LastToken - FirstToken; //find sentence locations List <Sentence> sentList = new List <Sentence>(); int nextSent = 0; //add first sentence - no need to worry about start location, it's always 0 sentList.Add(new Sentence(t, FirstToken)); NumSentences = 1; while (true) { //locate nearest punctuation mark location = tokens.IndexOf(".", nextSent); if ((location > tokens.IndexOf("?", nextSent) && tokens.IndexOf("?", nextSent) != -1) || location == -1) { location = tokens.IndexOf("?", nextSent); } if ((location > tokens.IndexOf("!", nextSent) && tokens.IndexOf("!", nextSent) != -1) || location == -1) { location = tokens.IndexOf("!", nextSent); } nextSent = location + 1; if (location != -1 && nextSent < LastToken) { sentList.Add(new Sentence(t, location + 1)); NumSentences++; }//if else { break; } }//while foreach (Sentence s in sentList) { total += s.WordCount; }//foreach AvgSentenceWordCount = total / NumSentences; //generate text paragraph OriginalParagraph = Utility.FormatText(tokens, FirstToken, LastToken, 0, 0); }//Paragraph(Text)