Пример #1
0
        }  //ParagraphList()

        /// <summary>
        /// Paramaterized constructor
        /// </summary>
        /// <param name="t">Text object to pull Paragraphs from</param>
        public ParagraphList(Text t)
        {
            List <string> tokens = new List <string>(t.GetTokens());
            int           lastToken, firstToken; //token the search starts from & last token in the paragraph
            int           result;                //location of first \n or \r found
            double        total = 0;             //for avg word count

            //add first paragraph -- no need to worry about location
            Paragraphs.Add(new Paragraph(t, 0));
            NumParagraphs = 1;
            firstToken    = 0;

            while (true)
            {
                //determine end of paragraph
                while (true)
                {
                    result = tokens.IndexOf("\n", firstToken);
                    if ((tokens.IndexOf("\r", firstToken) < result && tokens.IndexOf("\r", firstToken) != -1) || result == -1)
                    {
                        result = tokens.IndexOf("\r", firstToken);
                    }

                    if (result + 1 >= tokens.Count)     //end of file
                    {
                        lastToken = tokens.Count - 1;
                        break;
                    }//if
                    else if (tokens[result].Equals(tokens[result + 1]))     //two consecutive newlines or returns found
                    {
                        lastToken = result;
                        break;
                    }//else if
                    else
                    {
                        firstToken = result + 1;
                    }
                }//while

                if (lastToken != tokens.Count - 1)
                {
                    Paragraphs.Add(new Paragraph(t, lastToken + 1));    //add the NEXT paragraph to the list
                    NumParagraphs++;
                    firstToken = lastToken + 1;
                }//if
                else                       //Paragraph found is last one -- already added on previous loop
                {
                    break;
                }
            }//while

            //determine avg word count
            for (int i = 0; i < NumParagraphs; i++)
            {
                total += Paragraphs[i].NumWords;
            } //for
            AvgWordCount = total / NumParagraphs;
        }     //ParagraphList(Text)
Пример #2
0
        }//Words()

        /// <summary>
        /// Paramaterized constructor
        /// </summary>
        /// <param name="t">Text file to be split into DistinctWords</param>
        public Words(Text t)
        {
            words = new List <DistinctWord>();
            List <String> tokens = new List <String>();

            tokens = t.GetTokens();

            foreach (string token in tokens)
            {
                words.Add(new DistinctWord(token));
            }//foreach

            Alphabetize();
        }//Words(Text)
Пример #3
0
        } //Sentence()

        /// <summary>
        /// Paramaterized constructor
        /// </summary>
        /// <param name="t">Text to pull sentence from</param>
        /// <param name="location">token index where sentence begins</param>
        #region SentCon

        public Sentence(Text t, int location)
        {
            Regex r = new Regex(@"(\W)");
            //find end location
            List <string> tokens = new List <string>(t.GetTokens());
            int           end    = tokens.IndexOf(".", location);                                          //assume the first . is the end of the sentence

            if ((tokens.IndexOf("?", location) < end && tokens.IndexOf("?", location) != -1) || end == -1) //if there is a ? and it's before the first .
            {
                end = tokens.IndexOf("?", location);                                                       //first ? is the end of the sentence
            }
            if ((tokens.IndexOf("!", location) < end && tokens.IndexOf("!", location) != -1) || end == -1) //if there is a ! and it's before the first ./?
            {
                end = tokens.IndexOf("!", location);                                                       //first ! is the end of the sentence
            }
            FirstToken = location;
            if (end != -1)
            {
                LastToken = end;
            }
            else                          //sentence ends without punctuation
            {
                LastToken = tokens.Count() - 1;
            }

            //generate sentence
            FullSentence = Utility.FormatText(tokens, FirstToken, LastToken, 0, 0);

            //find word count

            WordCount = (LastToken - FirstToken);

            for (int i = FirstToken; i < LastToken; i++)
            {
                if (r.IsMatch(tokens[i]))
                {
                    WordCount--;
                }
            }    //for

            //find avg word length
            double total = 0;

            for (int i = FirstToken; i < LastToken; i++)
            {
                total += tokens[i].Length;
            } //for
            AvgWordLength = total / WordCount;
        }     //Sentence(Text, int)
Пример #4
0
        } //SentenceList()

        /// <summary>
        /// Paramaterized constructor
        /// </summary>
        /// <param name="t">Text to be split into sentences</param>
        public SentenceList(Text t)
        {
            int           location;                 //stores location of punctuation mark
            List <string> tokens = t.GetTokens();   //tokens

            Sentences = new List <Sentence>();      //initialize sentence list
            int    nextSent = 0;                    //location of start of next sentence
            double total    = 0;                    //combined amount of words in list, used for avg

            //add first sentence - no need to worry about start location, it's always 0
            Sentences.Add(new Sentence(t, 0));
            NumSentences = 1;

            while (true)
            {
                //locate nearest punctuation mark
                location = tokens.IndexOf(".", nextSent);
                if ((location > tokens.IndexOf("?", nextSent) && tokens.IndexOf("?", nextSent) != -1) || location == -1)
                {
                    location = tokens.IndexOf("?", nextSent);
                }
                if ((location > tokens.IndexOf("!", nextSent) && tokens.IndexOf("!", nextSent) != -1) || location == -1)
                {
                    location = tokens.IndexOf("!", nextSent);
                }
                nextSent = location + 1;

                if (location != -1 &&
                    nextSent < tokens.Count())
                {
                    Sentences.Add(new Sentence(t, (location + 1)));
                    NumSentences++;
                }//if
                else
                {
                    break;
                }
            }//while

            //find avg word count
            foreach (Sentence s in Sentences)
            {
                total += s.WordCount;
            } //foreach
            AvgWordCount = total / NumSentences;
        }     //SentenceList(Text)
Пример #5
0
        } //Paragraph()

        /// <summary>
        /// Paramterized constructor
        /// </summary>
        /// <param name="t">Text to pull paragraph from</param>
        /// <param name="location">Location to begin extracting paragraph</param>
        public Paragraph(Text t, int location)
        {
            int           result;                                    //location of \n or \r found
            double        total  = 0;                                //used for average calculation
            List <string> tokens = new List <String>(t.GetTokens()); //token list

            FirstToken = location;

            //determine end of paragraph
            while (true)
            {
                result = tokens.IndexOf("\n", location);
                if ((tokens.IndexOf("\r", location) < result && tokens.IndexOf("\r", location) != -1) || result == -1)
                {
                    result = tokens.IndexOf("\r", location);
                }

                if (result + 1 >= tokens.Count)     //end of file
                {
                    LastToken = tokens.Count - 1;
                    break;
                }//if
                else if (tokens[result].Equals(tokens[result + 1]))     //two consecutive newlines or returns found
                {
                    LastToken = result;
                    break;
                }//else if
                else
                {
                    location = result + 1;
                }
            }//while

            NumWords = LastToken - FirstToken;

            //find sentence locations
            List <Sentence> sentList = new List <Sentence>();
            int             nextSent = 0;

            //add first sentence - no need to worry about start location, it's always 0
            sentList.Add(new Sentence(t, FirstToken));
            NumSentences = 1;

            while (true)
            {
                //locate nearest punctuation mark
                location = tokens.IndexOf(".", nextSent);
                if ((location > tokens.IndexOf("?", nextSent) && tokens.IndexOf("?", nextSent) != -1) || location == -1)
                {
                    location = tokens.IndexOf("?", nextSent);
                }
                if ((location > tokens.IndexOf("!", nextSent) && tokens.IndexOf("!", nextSent) != -1) || location == -1)
                {
                    location = tokens.IndexOf("!", nextSent);
                }
                nextSent = location + 1;

                if (location != -1 && nextSent < LastToken)
                {
                    sentList.Add(new Sentence(t, location + 1));
                    NumSentences++;
                }//if
                else
                {
                    break;
                }
            }//while

            foreach (Sentence s in sentList)
            {
                total += s.WordCount;
            }//foreach
            AvgSentenceWordCount = total / NumSentences;

            //generate text paragraph
            OriginalParagraph = Utility.FormatText(tokens, FirstToken, LastToken, 0, 0);
        }//Paragraph(Text)