public List<string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap)
            {
                return analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList();
            }

            var baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score;
            return analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList();
        }
Esempio n. 2
0
        public List <string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap)
            {
                return(analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
            }

            var baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score;

            return(analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
        }
        public List <string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.MaxConceptsInPercent)
            {
                return(analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
            }

            double percent = (summarizerArguments.MaxConceptsInPercent / 100D) * analyzedDocument.ScoredTextUnits.Count;

            return(analyzedDocument.ScoredTextUnits.OrderByDescending(textUnitScore => textUnitScore.Score)
                   .Take((int)Math.Round(percent, 0, MidpointRounding.AwayFromZero))
                   .Select(textUnitScore => textUnitScore.ScoredTextUnit.FormattedValue)
                   .ToList());
        }
Esempio n. 4
0
        public static SummarizedDocument Summarize(IContentProvider contentProvider, ISummarizerArguments args)
        {
            if (contentProvider == null || args == null)
            {
                return(new SummarizedDocument());
            }

            SummarizingEngine engine = new SummarizingEngine();

            ParsedDocument     parsedDocument          = engine.ParseContent(contentProvider, args.ContentParser());
            AnalyzedDocument   analyzedDocument        = engine.AnalyzeParsedContent(parsedDocument, args.ContentAnalyzer());
            SummarizedDocument summaryAnalysisDocument = engine.SummarizeAnalyzedContent(analyzedDocument, args.ContentSummarizer(), args);

            return(summaryAnalysisDocument);
        }
        public static SummarizedDocument Summarize(IContentProvider contentProvider, ISummarizerArguments args)
        {
            if (contentProvider == null || args == null)
            {
                return new SummarizedDocument();
            }

            var engine = new SummarizingEngine();

            var parsedDocument = engine.ParseContent(contentProvider, args.ContentParser());
            var analyzedDocument = engine.AnalyzeParsedContent(parsedDocument, args.ContentAnalyzer());
            var summaryAnalysisDocument = engine.SummarizeAnalysedContent(analyzedDocument, args.ContentSummarizer(), args);

            return summaryAnalysisDocument;
        }
        public List<string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            var totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count);
            var targetWordCount = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100;
            var currentWordCount = 0;
            var currentSentenceIndex = 0;
            var selectedSentences = new List<Sentence>();

            while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 &&
                    selectedSentences.Count < summarizerArguments.MaxSummarySentences &&
                    currentWordCount < targetWordCount)
            {
                var selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence;
                selectedSentences.Add(selectedSentence);
                currentWordCount += selectedSentence.TextUnits.Count();
                currentSentenceIndex += 1;
            }

            return selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList();
        }
Esempio n. 7
0
        public List <string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            var totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count);
            var targetWordCount       = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100;
            var currentWordCount      = 0;
            var currentSentenceIndex  = 0;
            var selectedSentences     = new List <Sentence>();

            while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 &&
                   selectedSentences.Count < summarizerArguments.MaxSummarySentences &&
                   currentWordCount < targetWordCount)
            {
                var selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence;
                selectedSentences.Add(selectedSentence);
                currentWordCount     += selectedSentence.TextUnits.Count();
                currentSentenceIndex += 1;
            }

            return(selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList());
        }
        /// <summary>
        /// Runs the content summarizing part of the summarizing algorithm
        /// </summary>
        /// <param name="analyzedDocument"></param>
        /// <param name="contentSummarizer"></param>
        /// <param name="arguments"></param>
        /// <returns></returns>
        public SummarizedDocument SummarizeAnalyzedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments)
        {
            if (analyzedDocument == null)
            {
                throw new ArgumentNullException(nameof(analyzedDocument));
            }

            if (contentSummarizer == null)
            {
                throw new ArgumentNullException(nameof(contentSummarizer));
            }

            if (arguments == null)
            {
                throw new ArgumentNullException(nameof(arguments));
            }

            // Range adjustment
            if (arguments.FilteringConceptsCap < 0)
            {
                arguments.FilteringConceptsCap = 0;
            }

            if (arguments.MaxSummarySentences < 0)
            {
                arguments.MaxSummarySentences = 0;
            }

            if (arguments.MaxSummarySizeInPercent < 0)
            {
                arguments.MaxSummarySizeInPercent = 0;
            }

            if (arguments.MaxSummarySizeInPercent > 100)
            {
                arguments.MaxSummarySizeInPercent = 100;
            }

            List <string> summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments);

            if (summarizedConcepts == null)
            {
                throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetConcepts must not return null");
            }

            List <string> summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments);

            if (summarizedSentences == null)
            {
                throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetSentences must not return null");
            }

            return(new SummarizedDocument {
                Concepts = summarizedConcepts, Sentences = summarizedSentences
            });
        }
 public void throws_if_null_arguments_are_passed(AnalyzedDocument analyzedDocument,
     IContentSummarizer contentSummarizer, ISummarizerArguments summarizerArguments)
 {
     Assert.Throws<ArgumentNullException>(
         () => { Target.SummarizeAnalysedContent(analyzedDocument, contentSummarizer, summarizerArguments); });
 }
Esempio n. 10
0
 public void throws_if_null_arguments_are_passed(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments summarizerArguments)
 {
     Assert.That(() => Target.SummarizeAnalysedContent(analyzedDocument, contentSummarizer, summarizerArguments), Throws.TypeOf <ArgumentNullException>());
 }
        /// <summary>
        /// Runs the content summarizing part of the summarizing algorithm
        /// </summary>
        /// <param name="analyzedDocument"></param>
        /// <param name="contentSummarizer"></param>
        /// <param name="arguments"></param>
        /// <returns></returns>
        public SummarizedDocument SummarizeAnalysedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments)
        {
            if (analyzedDocument == null)
            {
                throw new ArgumentNullException("analyzedDocument");
            }

            if (contentSummarizer == null)
            {
                throw new ArgumentNullException("contentSummarizer");
            }

            if (arguments == null)
            {
                throw new ArgumentNullException("arguments");
            }

            // Range adjustment
            if (arguments.FilteringConceptsCap < 0)
            {
                arguments.FilteringConceptsCap = 0;
            }

            if (arguments.MaxSummarySentences < 0)
            {
                arguments.MaxSummarySentences = 0;
            }

            if (arguments.MaxSummarySizeInPercent < 0)
            {
                arguments.MaxSummarySizeInPercent = 0;
            }

            if (arguments.MaxSummarySizeInPercent > 100)
            {
                arguments.MaxSummarySizeInPercent = 100;
            }

            var summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments);

            if (summarizedConcepts == null)
            {
                throw new InvalidOperationException(string.Format("{0}.GetConcepts must not return null", contentSummarizer.GetType().FullName));
            }

            var summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments);

            if (summarizedSentences == null)
            {
                throw new InvalidOperationException(string.Format("{0}.GetSentences must not return null", contentSummarizer.GetType().FullName));
            }

            return(new SummarizedDocument()
            {
                Concepts = summarizedConcepts, Sentences = summarizedSentences
            });
        }
        /// <summary>
        /// Runs the content summarizing part of the summarizing algorithm
        /// </summary>
        /// <param name="analyzedDocument"></param>
        /// <param name="contentSummarizer"></param>
        /// <param name="arguments"></param>
        /// <returns></returns>
        public SummarizedDocument SummarizeAnalysedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments)
        {
            if (analyzedDocument == null)
            {
                throw new ArgumentNullException(nameof(analyzedDocument));
            }

            if (contentSummarizer == null)
            {
                throw new ArgumentNullException(nameof(contentSummarizer));
            }

            if (arguments == null)
            {
                throw new ArgumentNullException(nameof(arguments));
            }

            // Range adjustment
            if (arguments.FilteringConceptsCap < 0)
            {
                arguments.FilteringConceptsCap = 0;
            }

            if (arguments.MaxSummarySentences < 0)
            {
                arguments.MaxSummarySentences = 0;
            }

            if (arguments.MaxSummarySizeInPercent < 0)
            {
                arguments.MaxSummarySizeInPercent = 0;
            }

            if (arguments.MaxSummarySizeInPercent > 100)
            {
                arguments.MaxSummarySizeInPercent = 100;
            }

            var summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments);
            if (summarizedConcepts == null)
            {
                throw new InvalidOperationException(
                    $"{contentSummarizer.GetType().FullName}.GetConcepts must not return null");
            }

            var summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments);
            if (summarizedSentences == null)
            {
                throw new InvalidOperationException(
                    $"{contentSummarizer.GetType().FullName}.GetSentences must not return null");
            }

            return new SummarizedDocument() { Concepts = summarizedConcepts, Sentences = summarizedSentences };
        }
 public void throws_if_null_arguments_are_passed(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments summarizerArguments)
 {
     Target.SummarizeAnalysedContent(analyzedDocument, contentSummarizer, summarizerArguments);
 }
Esempio n. 14
0
 public void throws_if_null_arguments_are_passed(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments summarizerArguments)
 {
     Target.SummarizeAnalysedContent(analyzedDocument, contentSummarizer, summarizerArguments);
 }