/// <summary>
        /// Maps the sentences from text to selected sentence Ids to create the summary
        /// </summary>
        /// <param name="sentenceIds"></param>
        /// <param name="text"></param>
        /// <returns></returns>
        public TextModel AssembleSummarySentences(List <int> sentenceIds, TextModel text)
        {
            TextModel summary = new TextModel();

            foreach (var sentenceId in sentenceIds)
            {
                var sentence = text.GetSentence(sentenceId);

                summary.AddSentence(sentence);
            }
            return(summary);
        }
Пример #2
0
        /// <summary>
        /// Gets the unstemmed text
        /// </summary>
        /// <param name="inSentences"></param>
        /// <returns></returns>
        private static TextModel GetRawText(IList <string> inSentences)
        {
            TextModel rawText = new TextModel();

            for (int i = 0; i < inSentences.Count; i++)
            {
                var wordsFromSentence = TokenizeSentence(inSentences[i]);

                // add the un-stemmed text
                rawText.AddSentence(wordsFromSentence);
            }

            return(rawText);
        }
Пример #3
0
        /// <summary>
        /// Removees stop words and punctuation
        /// </summary>
        /// <param name="inSentences"></param>
        /// <param name="stopWordFilePath"></param>
        /// <returns></returns>
        private TextModel RemoveStopWordsAndPunctuation(ref IList <string> inSentences, string stopWordFilePath)
        {
            TextModel text = new TextModel();

            //initializing stop words
            var stopWordListRaw = File.ReadAllText(stopWordFilePath);

            var stopWordList = Regex.Split(stopWordListRaw, "\\W+");

            //remove stop words
            for (int i = 0; i < inSentences.Count; i++)
            {
                var wordsFromSentence = TokenizeSentence(inSentences[i]);

                wordsFromSentence.RemoveAll(word => stopWordList.Contains(word));

                inSentences[i] = string.Join("|", wordsFromSentence);

                // add the cleaned sentences to the TextModel
                text.AddSentence(wordsFromSentence);
            }

            return(text);
        }