Пример #1
0
        /// <summary>
        /// Runs the content summarizing part of the summarizing algorithm
        /// </summary>
        /// <param name="analyzedDocument"></param>
        /// <param name="contentSummarizer"></param>
        /// <param name="arguments"></param>
        /// <returns></returns>
        public SummarizedDocument SummarizeAnalysedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments)
        {
            if (analyzedDocument == null)
            {
                throw new ArgumentNullException("analyzedDocument");
            }

            if (contentSummarizer == null)
            {
                throw new ArgumentNullException("contentSummarizer");
            }

            if (arguments == null)
            {
                throw new ArgumentNullException("arguments");
            }

            // Range adjustment
            if (arguments.FilteringConceptsCap < 0)
            {
                arguments.FilteringConceptsCap = 0;
            }

            if (arguments.MaxSummarySentences < 0)
            {
                arguments.MaxSummarySentences = 0;
            }

            if (arguments.MaxSummarySizeInPercent < 0)
            {
                arguments.MaxSummarySizeInPercent = 0;
            }

            if (arguments.MaxSummarySizeInPercent > 100)
            {
                arguments.MaxSummarySizeInPercent = 100;
            }

            var summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments);

            if (summarizedConcepts == null)
            {
                throw new InvalidOperationException(string.Format("{0}.GetConcepts must not return null", contentSummarizer.GetType().FullName));
            }

            var summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments);

            if (summarizedSentences == null)
            {
                throw new InvalidOperationException(string.Format("{0}.GetSentences must not return null", contentSummarizer.GetType().FullName));
            }

            return(new SummarizedDocument()
            {
                Concepts = summarizedConcepts, Sentences = summarizedSentences
            });
        }
        public List<string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap)
            {
                return analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList();
            }

            var baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score;
            return analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList();
        }
Пример #3
0
        public List <string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap)
            {
                return(analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
            }

            var baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score;

            return(analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
        }
        public List <string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.MaxConceptsInPercent)
            {
                return(analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList());
            }

            double percent = (summarizerArguments.MaxConceptsInPercent / 100D) * analyzedDocument.ScoredTextUnits.Count;

            return(analyzedDocument.ScoredTextUnits.OrderByDescending(textUnitScore => textUnitScore.Score)
                   .Take((int)Math.Round(percent, 0, MidpointRounding.AwayFromZero))
                   .Select(textUnitScore => textUnitScore.ScoredTextUnit.FormattedValue)
                   .ToList());
        }
        public List<string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            var totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count);
            var targetWordCount = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100;
            var currentWordCount = 0;
            var currentSentenceIndex = 0;
            var selectedSentences = new List<Sentence>();

            while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 &&
                    selectedSentences.Count < summarizerArguments.MaxSummarySentences &&
                    currentWordCount < targetWordCount)
            {
                var selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence;
                selectedSentences.Add(selectedSentence);
                currentWordCount += selectedSentence.TextUnits.Count();
                currentSentenceIndex += 1;
            }

            return selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList();
        }
Пример #6
0
        public List <string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
        {
            var totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count);
            var targetWordCount       = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100;
            var currentWordCount      = 0;
            var currentSentenceIndex  = 0;
            var selectedSentences     = new List <Sentence>();

            while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 &&
                   selectedSentences.Count < summarizerArguments.MaxSummarySentences &&
                   currentWordCount < targetWordCount)
            {
                var selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence;
                selectedSentences.Add(selectedSentence);
                currentWordCount     += selectedSentence.TextUnits.Count();
                currentSentenceIndex += 1;
            }

            return(selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList());
        }
        /// <summary>
        /// Runs the content summarizing part of the summarizing algorithm
        /// </summary>
        /// <param name="analyzedDocument"></param>
        /// <param name="contentSummarizer"></param>
        /// <param name="arguments"></param>
        /// <returns></returns>
        public SummarizedDocument SummarizeAnalysedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments)
        {
            if (analyzedDocument == null)
            {
                throw new ArgumentNullException(nameof(analyzedDocument));
            }

            if (contentSummarizer == null)
            {
                throw new ArgumentNullException(nameof(contentSummarizer));
            }

            if (arguments == null)
            {
                throw new ArgumentNullException(nameof(arguments));
            }

            // Range adjustment
            if (arguments.FilteringConceptsCap < 0)
            {
                arguments.FilteringConceptsCap = 0;
            }

            if (arguments.MaxSummarySentences < 0)
            {
                arguments.MaxSummarySentences = 0;
            }

            if (arguments.MaxSummarySizeInPercent < 0)
            {
                arguments.MaxSummarySizeInPercent = 0;
            }

            if (arguments.MaxSummarySizeInPercent > 100)
            {
                arguments.MaxSummarySizeInPercent = 100;
            }

            var summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments);
            if (summarizedConcepts == null)
            {
                throw new InvalidOperationException(
                    $"{contentSummarizer.GetType().FullName}.GetConcepts must not return null");
            }

            var summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments);
            if (summarizedSentences == null)
            {
                throw new InvalidOperationException(
                    $"{contentSummarizer.GetType().FullName}.GetSentences must not return null");
            }

            return new SummarizedDocument() { Concepts = summarizedConcepts, Sentences = summarizedSentences };
        }
Пример #8
0
        /// <summary>
        /// Runs the content summarizing part of the summarizing algorithm
        /// </summary>
        /// <param name="analyzedDocument"></param>
        /// <param name="contentSummarizer"></param>
        /// <param name="arguments"></param>
        /// <returns></returns>
        public SummarizedDocument SummarizeAnalysedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments)
        {
            if (analyzedDocument == null)
            {
                throw new ArgumentNullException(nameof(analyzedDocument));
            }

            if (contentSummarizer == null)
            {
                throw new ArgumentNullException(nameof(contentSummarizer));
            }

            if (arguments == null)
            {
                throw new ArgumentNullException(nameof(arguments));
            }

            // Range adjustment
            if (arguments.MaxConceptsInPercent < 0)
            {
                arguments.MaxConceptsInPercent = 0;
            }
            else if (arguments.MaxConceptsInPercent > 100)
            {
                arguments.MaxConceptsInPercent = 100;
            }

            if (arguments.MaxSummarySentences < 0)
            {
                arguments.MaxSummarySentences = 0;
            }

            if (arguments.MaxSummarySizeInPercent < 0)
            {
                arguments.MaxSummarySizeInPercent = 0;
            }
            else if (arguments.MaxSummarySizeInPercent > 100)
            {
                arguments.MaxSummarySizeInPercent = 100;
            }

            var summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments);

            if (summarizedConcepts == null)
            {
                throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetConcepts must not return null");
            }

            var summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments);

            if (summarizedSentences == null)
            {
                throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetSentences must not return null");
            }

            return(new SummarizedDocument
            {
                Concepts = summarizedConcepts,
                Sentences = summarizedSentences
            });
        }