/// <summary> /// controls all the sumary generation task /// </summary> /// <param name="sentenceIds">ids of selected sentences</param> public void GenerateSummary(List <int> sentenceIds) { TextModel rawSummary = AssembleSummarySentences(sentenceIds, _text); summary = AssembleSummarySentences(sentenceIds, _unstemmedText); PrintSummary(summary); }
private double CalculatePrecision(TextModel humanSummaryText) { int precisionCount = GetBaseSentenceCount(humanSummaryText); int genratedSentenceCount = _genratedSummary.GetSentenceCount(); double precision = Math.Round((double)precisionCount / genratedSentenceCount, 3); return(precision); }
public void PrintSummary(TextModel summary) { Console.WriteLine('\n'); for (int i = 0; i < summary.GetSentenceCount(); i++) { String outputSentence = " - " + summary.GetSentenceAsAString(i); Console.WriteLine(outputSentence); } }
private double CalculateRecall(TextModel humanSummaryText) { // the count of sentences which exist in the generated summary int recallCount = GetBaseSentenceCount(humanSummaryText); int humanGenSentenceCount = humanSummaryText.GetSentenceCount(); double recall = Math.Round((double)recallCount / humanGenSentenceCount, 3); return(recall); }
/// <summary> /// Maps the sentences from text to selected sentence Ids to create the summary /// </summary> /// <param name="sentenceIds"></param> /// <param name="text"></param> /// <returns></returns> public TextModel AssembleSummarySentences(List <int> sentenceIds, TextModel text) { TextModel summary = new TextModel(); foreach (var sentenceId in sentenceIds) { var sentence = text.GetSentence(sentenceId); summary.AddSentence(sentence); } return(summary); }
/// <summary> /// Parses the human generated summary to Text Model using the text file path /// </summary> /// <param name="humanSummaryPath">text file path for the human generated summary</param> /// <returns></returns> public TextModel GetHumanGeneratedSummary(string humanSummaryPath) { string summary = File.ReadAllText(humanSummaryPath); Preprocessor preprocessor = new Preprocessor(summary); preprocessor.RunPreprocessor(); TextModel humanSummaryText = preprocessor.GetUnstemmedText(); return(humanSummaryText); }
static void Main(string[] args) { //------ Adjustable parameters ------ // score weight for the cue-phrase feature double cuePhraseScoreWeighting = 0.3; //thresoldValue to controller the size of the summary double defaultSelectionThreshold = 0.5; // ---------------------------------- double selectionThreshold; //Innitializes the program and prompts user for the file path of the document var path = InnitializeProgram(out selectionThreshold, defaultSelectionThreshold); //pass it to a string string initialText = File.ReadAllText(path); //send it to preprocessing Preprocessor preprocessor = new Preprocessor(initialText); var text = preprocessor.RunPreprocessor(); TextModel unstemmedText = preprocessor.GetUnstemmedText(); //send it to feature extraction FeatureExtractor featureExtractor = new FeatureExtractor(text, unstemmedText); Dictionary <int, double> sentenceScores = featureExtractor.RunFeatureExtractor(cuePhraseScoreWeighting); //send it to sentence selection and assembly SentenceSelector sentenceSelector = new SentenceSelector(text); List <int> rankedSentenceIds = sentenceSelector.RunSentenceSelector(sentenceScores, selectionThreshold); //send to generate summary SummaryGenerator summaryGenerator = new SummaryGenerator(text, unstemmedText); summaryGenerator.GenerateSummary(rankedSentenceIds); //evalaute performance Evaluator summaryEvaluator = new Evaluator(summaryGenerator.GetGeneratedSummary()); string humanSummaryPath = "..\\..\\Resources\\Test\\humanSummaryNews.txt"; summaryEvaluator.RunEvaluator(humanSummaryPath); Console.ReadKey(); }
/// <summary> /// Gets the unstemmed text /// </summary> /// <param name="inSentences"></param> /// <returns></returns> private static TextModel GetRawText(IList <string> inSentences) { TextModel rawText = new TextModel(); for (int i = 0; i < inSentences.Count; i++) { var wordsFromSentence = TokenizeSentence(inSentences[i]); // add the un-stemmed text rawText.AddSentence(wordsFromSentence); } return(rawText); }
public static TextModel WordStemmer(TextModel text) { Stemmer stemmer = new Stemmer(); for (int i = 0; i < text.GetSentenceCount(); i++) { for (int k = 0; k < text.GetWordCountInSentence(i); k++) { var stemmedWord = stemmer.StemWord(text.GetWord(i, k)); text.SetWord(i, k, stemmedWord); } } return(text); }
/// <summary> /// Gets the count of sentences which are both in the generated text and the human summarized text /// </summary> /// <param name="humanSummaryText"></param> /// <returns>count of sentences which exist in both texts</returns> private int GetBaseSentenceCount(TextModel humanSummaryText) { int baseSentenceCount = 0; var humanGeneratedSummarySentences = humanSummaryText.GetSentencesAsStrings(); for (int i = 0; i < _genratedSummary.GetSentenceCount(); i++) { var generatedSentence = _genratedSummary.GetSentenceAsAString(i); if (humanGeneratedSummarySentences.Contains(generatedSentence)) { baseSentenceCount++; } } return(baseSentenceCount); }
/// <summary> /// Manages the tasks of the preprocessor /// </summary> public TextModel RunPreprocessor() { TextModel text = new TextModel(); //convert to lower case (string text) string inTextLowerCase = _initialText.ToLower(); string stopWordFilePath = "../../Resources/Preprocessing/standard-stopwords.txt"; IList <string> inSenetences = SegmentSentences(inTextLowerCase); // partially pre-processed text (not stemmed) _unstemmedText = GetRawText(inSenetences); TextModel inTextNoStopWords = RemoveStopWordsAndPunctuation(ref inSenetences, stopWordFilePath); TextModel inTextWordStemmed = WordStemmer(inTextNoStopWords); return(inTextWordStemmed); }
/// <summary> /// Removees stop words and punctuation /// </summary> /// <param name="inSentences"></param> /// <param name="stopWordFilePath"></param> /// <returns></returns> private TextModel RemoveStopWordsAndPunctuation(ref IList <string> inSentences, string stopWordFilePath) { TextModel text = new TextModel(); //initializing stop words var stopWordListRaw = File.ReadAllText(stopWordFilePath); var stopWordList = Regex.Split(stopWordListRaw, "\\W+"); //remove stop words for (int i = 0; i < inSentences.Count; i++) { var wordsFromSentence = TokenizeSentence(inSentences[i]); wordsFromSentence.RemoveAll(word => stopWordList.Contains(word)); inSentences[i] = string.Join("|", wordsFromSentence); // add the cleaned sentences to the TextModel text.AddSentence(wordsFromSentence); } return(text); }
public FeatureExtractor(TextModel text, TextModel unstemmedText) { _text = text; _unstemmedText = unstemmedText; }
public SummaryGenerator(TextModel text, TextModel orignalUnstemmedText) { _text = text; _unstemmedText = orignalUnstemmedText; }
public SentenceSelector(TextModel text) { _text = text; }
public Evaluator(TextModel generatedSummary) { _genratedSummary = generatedSummary; }