/// <summary>
        /// Runs the content summarizing part of the summarizing algorithm
        /// </summary>
        /// <param name="analyzedDocument"></param>
        /// <param name="contentSummarizer"></param>
        /// <param name="arguments"></param>
        /// <returns></returns>
        public SummarizedDocument SummarizeAnalyzedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments)
        {
            if (analyzedDocument == null)
            {
                throw new ArgumentNullException(nameof(analyzedDocument));
            }

            if (contentSummarizer == null)
            {
                throw new ArgumentNullException(nameof(contentSummarizer));
            }

            if (arguments == null)
            {
                throw new ArgumentNullException(nameof(arguments));
            }

            // Range adjustment
            if (arguments.FilteringConceptsCap < 0)
            {
                arguments.FilteringConceptsCap = 0;
            }

            if (arguments.MaxSummarySentences < 0)
            {
                arguments.MaxSummarySentences = 0;
            }

            if (arguments.MaxSummarySizeInPercent < 0)
            {
                arguments.MaxSummarySizeInPercent = 0;
            }

            if (arguments.MaxSummarySizeInPercent > 100)
            {
                arguments.MaxSummarySizeInPercent = 100;
            }

            List <string> summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments);

            if (summarizedConcepts == null)
            {
                throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetConcepts must not return null");
            }

            List <string> summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments);

            if (summarizedSentences == null)
            {
                throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetSentences must not return null");
            }

            return(new SummarizedDocument {
                Concepts = summarizedConcepts, Sentences = summarizedSentences
            });
        }
        public List <string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap)
            {
                return(analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
            }

            double baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score;

            return(analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
        }
Esempio n. 3
0
        public static SummarizedDocument Summarize(IContentProvider contentProvider, ISummarizerArguments args)
        {
            if (contentProvider == null || args == null)
            {
                return(new SummarizedDocument());
            }

            SummarizingEngine engine = new SummarizingEngine();

            ParsedDocument     parsedDocument          = engine.ParseContent(contentProvider, args.ContentParser());
            AnalyzedDocument   analyzedDocument        = engine.AnalyzeParsedContent(parsedDocument, args.ContentAnalyzer());
            SummarizedDocument summaryAnalysisDocument = engine.SummarizeAnalyzedContent(analyzedDocument, args.ContentSummarizer(), args);

            return(summaryAnalysisDocument);
        }
        public List <string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            int             totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count);
            int             targetWordCount       = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100;
            int             currentWordCount      = 0;
            int             currentSentenceIndex  = 0;
            List <Sentence> selectedSentences     = new List <Sentence>();

            while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 &&
                   selectedSentences.Count < summarizerArguments.MaxSummarySentences &&
                   currentWordCount < targetWordCount)
            {
                Sentence selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence;
                selectedSentences.Add(selectedSentence);
                currentWordCount     += selectedSentence.TextUnits.Count();
                currentSentenceIndex += 1;
            }

            return(selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList());
        }