/// <summary>
        /// controls all the sumary generation task
        /// </summary>
        /// <param name="sentenceIds">ids of selected sentences</param>
        public void GenerateSummary(List <int> sentenceIds)
        {
            TextModel rawSummary = AssembleSummarySentences(sentenceIds, _text);

            summary = AssembleSummarySentences(sentenceIds, _unstemmedText);

            PrintSummary(summary);
        }
Beispiel #2
0
        private double CalculatePrecision(TextModel humanSummaryText)
        {
            int precisionCount = GetBaseSentenceCount(humanSummaryText);

            int genratedSentenceCount = _genratedSummary.GetSentenceCount();

            double precision = Math.Round((double)precisionCount / genratedSentenceCount, 3);

            return(precision);
        }
        public void PrintSummary(TextModel summary)
        {
            Console.WriteLine('\n');

            for (int i = 0; i < summary.GetSentenceCount(); i++)
            {
                String outputSentence = " - " + summary.GetSentenceAsAString(i);

                Console.WriteLine(outputSentence);
            }
        }
Beispiel #4
0
        private double CalculateRecall(TextModel humanSummaryText)
        {
            // the count of sentences which exist in the generated summary
            int recallCount = GetBaseSentenceCount(humanSummaryText);

            int humanGenSentenceCount = humanSummaryText.GetSentenceCount();

            double recall = Math.Round((double)recallCount / humanGenSentenceCount, 3);

            return(recall);
        }
        /// <summary>
        /// Maps the sentences from text to selected sentence Ids to create the summary
        /// </summary>
        /// <param name="sentenceIds"></param>
        /// <param name="text"></param>
        /// <returns></returns>
        public TextModel AssembleSummarySentences(List <int> sentenceIds, TextModel text)
        {
            TextModel summary = new TextModel();

            foreach (var sentenceId in sentenceIds)
            {
                var sentence = text.GetSentence(sentenceId);

                summary.AddSentence(sentence);
            }
            return(summary);
        }
Beispiel #6
0
        /// <summary>
        /// Parses the human generated summary to Text Model using the text file path
        /// </summary>
        /// <param name="humanSummaryPath">text file path for the human generated summary</param>
        /// <returns></returns>
        public TextModel GetHumanGeneratedSummary(string humanSummaryPath)
        {
            string summary = File.ReadAllText(humanSummaryPath);

            Preprocessor preprocessor = new Preprocessor(summary);

            preprocessor.RunPreprocessor();

            TextModel humanSummaryText = preprocessor.GetUnstemmedText();

            return(humanSummaryText);
        }
Beispiel #7
0
        static void Main(string[] args)
        {
            //------ Adjustable parameters ------

            // score weight for the cue-phrase feature
            double cuePhraseScoreWeighting = 0.3;

            //thresoldValue to controller the size of the summary
            double defaultSelectionThreshold = 0.5;

            // ----------------------------------

            double selectionThreshold;

            //Innitializes the program and prompts user for the file path of the document
            var path = InnitializeProgram(out selectionThreshold, defaultSelectionThreshold);

            //pass it to a string
            string initialText = File.ReadAllText(path);

            //send it to preprocessing
            Preprocessor preprocessor = new Preprocessor(initialText);

            var text = preprocessor.RunPreprocessor();

            TextModel unstemmedText = preprocessor.GetUnstemmedText();

            //send it to feature extraction
            FeatureExtractor featureExtractor = new FeatureExtractor(text, unstemmedText);

            Dictionary <int, double> sentenceScores = featureExtractor.RunFeatureExtractor(cuePhraseScoreWeighting);

            //send it to sentence selection and assembly
            SentenceSelector sentenceSelector = new SentenceSelector(text);

            List <int> rankedSentenceIds = sentenceSelector.RunSentenceSelector(sentenceScores, selectionThreshold);

            //send to generate summary
            SummaryGenerator summaryGenerator = new SummaryGenerator(text, unstemmedText);

            summaryGenerator.GenerateSummary(rankedSentenceIds);

            //evalaute performance
            Evaluator summaryEvaluator = new Evaluator(summaryGenerator.GetGeneratedSummary());

            string humanSummaryPath = "..\\..\\Resources\\Test\\humanSummaryNews.txt";

            summaryEvaluator.RunEvaluator(humanSummaryPath);

            Console.ReadKey();
        }
        /// <summary>
        /// Gets the unstemmed text
        /// </summary>
        /// <param name="inSentences"></param>
        /// <returns></returns>
        private static TextModel GetRawText(IList <string> inSentences)
        {
            TextModel rawText = new TextModel();

            for (int i = 0; i < inSentences.Count; i++)
            {
                var wordsFromSentence = TokenizeSentence(inSentences[i]);

                // add the un-stemmed text
                rawText.AddSentence(wordsFromSentence);
            }

            return(rawText);
        }
        public static TextModel WordStemmer(TextModel text)
        {
            Stemmer stemmer = new Stemmer();

            for (int i = 0; i < text.GetSentenceCount(); i++)
            {
                for (int k = 0; k < text.GetWordCountInSentence(i); k++)
                {
                    var stemmedWord = stemmer.StemWord(text.GetWord(i, k));

                    text.SetWord(i, k, stemmedWord);
                }
            }
            return(text);
        }
Beispiel #10
0
        /// <summary>
        /// Gets the count of sentences which are both in the generated text and the human summarized text
        /// </summary>
        /// <param name="humanSummaryText"></param>
        /// <returns>count of sentences which exist in both texts</returns>
        private int GetBaseSentenceCount(TextModel humanSummaryText)
        {
            int baseSentenceCount = 0;

            var humanGeneratedSummarySentences = humanSummaryText.GetSentencesAsStrings();

            for (int i = 0; i < _genratedSummary.GetSentenceCount(); i++)
            {
                var generatedSentence = _genratedSummary.GetSentenceAsAString(i);

                if (humanGeneratedSummarySentences.Contains(generatedSentence))
                {
                    baseSentenceCount++;
                }
            }
            return(baseSentenceCount);
        }
        /// <summary>
        /// Manages the tasks of the preprocessor
        /// </summary>
        public TextModel RunPreprocessor()
        {
            TextModel text = new TextModel();

            //convert to lower case (string text)
            string inTextLowerCase = _initialText.ToLower();

            string stopWordFilePath = "../../Resources/Preprocessing/standard-stopwords.txt";

            IList <string> inSenetences = SegmentSentences(inTextLowerCase);

            // partially pre-processed text (not stemmed)
            _unstemmedText = GetRawText(inSenetences);

            TextModel inTextNoStopWords = RemoveStopWordsAndPunctuation(ref inSenetences, stopWordFilePath);

            TextModel inTextWordStemmed = WordStemmer(inTextNoStopWords);

            return(inTextWordStemmed);
        }
        /// <summary>
        /// Removees stop words and punctuation
        /// </summary>
        /// <param name="inSentences"></param>
        /// <param name="stopWordFilePath"></param>
        /// <returns></returns>
        private TextModel RemoveStopWordsAndPunctuation(ref IList <string> inSentences, string stopWordFilePath)
        {
            TextModel text = new TextModel();

            //initializing stop words
            var stopWordListRaw = File.ReadAllText(stopWordFilePath);

            var stopWordList = Regex.Split(stopWordListRaw, "\\W+");

            //remove stop words
            for (int i = 0; i < inSentences.Count; i++)
            {
                var wordsFromSentence = TokenizeSentence(inSentences[i]);

                wordsFromSentence.RemoveAll(word => stopWordList.Contains(word));

                inSentences[i] = string.Join("|", wordsFromSentence);

                // add the cleaned sentences to the TextModel
                text.AddSentence(wordsFromSentence);
            }

            return(text);
        }
 public FeatureExtractor(TextModel text, TextModel unstemmedText)
 {
     _text          = text;
     _unstemmedText = unstemmedText;
 }
 public SummaryGenerator(TextModel text, TextModel orignalUnstemmedText)
 {
     _text          = text;
     _unstemmedText = orignalUnstemmedText;
 }
 public SentenceSelector(TextModel text)
 {
     _text = text;
 }
Beispiel #16
0
 public Evaluator(TextModel generatedSummary)
 {
     _genratedSummary = generatedSummary;
 }