/// <summary> /// Maps the sentences from text to selected sentence Ids to create the summary /// </summary> /// <param name="sentenceIds"></param> /// <param name="text"></param> /// <returns></returns> public TextModel AssembleSummarySentences(List <int> sentenceIds, TextModel text) { TextModel summary = new TextModel(); foreach (var sentenceId in sentenceIds) { var sentence = text.GetSentence(sentenceId); summary.AddSentence(sentence); } return(summary); }
/// <summary> /// Gets the unstemmed text /// </summary> /// <param name="inSentences"></param> /// <returns></returns> private static TextModel GetRawText(IList <string> inSentences) { TextModel rawText = new TextModel(); for (int i = 0; i < inSentences.Count; i++) { var wordsFromSentence = TokenizeSentence(inSentences[i]); // add the un-stemmed text rawText.AddSentence(wordsFromSentence); } return(rawText); }
/// <summary> /// Removees stop words and punctuation /// </summary> /// <param name="inSentences"></param> /// <param name="stopWordFilePath"></param> /// <returns></returns> private TextModel RemoveStopWordsAndPunctuation(ref IList <string> inSentences, string stopWordFilePath) { TextModel text = new TextModel(); //initializing stop words var stopWordListRaw = File.ReadAllText(stopWordFilePath); var stopWordList = Regex.Split(stopWordListRaw, "\\W+"); //remove stop words for (int i = 0; i < inSentences.Count; i++) { var wordsFromSentence = TokenizeSentence(inSentences[i]); wordsFromSentence.RemoveAll(word => stopWordList.Contains(word)); inSentences[i] = string.Join("|", wordsFromSentence); // add the cleaned sentences to the TextModel text.AddSentence(wordsFromSentence); } return(text); }