public List<string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments) { if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap) { return analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList(); } var baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score; return analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList(); }
public List <string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments) { if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap) { return(analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList()); } var baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score; return(analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList()); }
public List <string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments) { if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.MaxConceptsInPercent) { return(analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList()); } double percent = (summarizerArguments.MaxConceptsInPercent / 100D) * analyzedDocument.ScoredTextUnits.Count; return(analyzedDocument.ScoredTextUnits.OrderByDescending(textUnitScore => textUnitScore.Score) .Take((int)Math.Round(percent, 0, MidpointRounding.AwayFromZero)) .Select(textUnitScore => textUnitScore.ScoredTextUnit.FormattedValue) .ToList()); }
public static SummarizedDocument Summarize(IContentProvider contentProvider, ISummarizerArguments args) { if (contentProvider == null || args == null) { return(new SummarizedDocument()); } SummarizingEngine engine = new SummarizingEngine(); ParsedDocument parsedDocument = engine.ParseContent(contentProvider, args.ContentParser()); AnalyzedDocument analyzedDocument = engine.AnalyzeParsedContent(parsedDocument, args.ContentAnalyzer()); SummarizedDocument summaryAnalysisDocument = engine.SummarizeAnalyzedContent(analyzedDocument, args.ContentSummarizer(), args); return(summaryAnalysisDocument); }
public static SummarizedDocument Summarize(IContentProvider contentProvider, ISummarizerArguments args) { if (contentProvider == null || args == null) { return new SummarizedDocument(); } var engine = new SummarizingEngine(); var parsedDocument = engine.ParseContent(contentProvider, args.ContentParser()); var analyzedDocument = engine.AnalyzeParsedContent(parsedDocument, args.ContentAnalyzer()); var summaryAnalysisDocument = engine.SummarizeAnalysedContent(analyzedDocument, args.ContentSummarizer(), args); return summaryAnalysisDocument; }
public List<string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments) { var totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count); var targetWordCount = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100; var currentWordCount = 0; var currentSentenceIndex = 0; var selectedSentences = new List<Sentence>(); while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 && selectedSentences.Count < summarizerArguments.MaxSummarySentences && currentWordCount < targetWordCount) { var selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence; selectedSentences.Add(selectedSentence); currentWordCount += selectedSentence.TextUnits.Count(); currentSentenceIndex += 1; } return selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList(); }
public List <string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments) { var totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count); var targetWordCount = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100; var currentWordCount = 0; var currentSentenceIndex = 0; var selectedSentences = new List <Sentence>(); while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 && selectedSentences.Count < summarizerArguments.MaxSummarySentences && currentWordCount < targetWordCount) { var selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence; selectedSentences.Add(selectedSentence); currentWordCount += selectedSentence.TextUnits.Count(); currentSentenceIndex += 1; } return(selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList()); }
/// <summary> /// Runs the content summarizing part of the summarizing algorithm /// </summary> /// <param name="analyzedDocument"></param> /// <param name="contentSummarizer"></param> /// <param name="arguments"></param> /// <returns></returns> public SummarizedDocument SummarizeAnalyzedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments) { if (analyzedDocument == null) { throw new ArgumentNullException(nameof(analyzedDocument)); } if (contentSummarizer == null) { throw new ArgumentNullException(nameof(contentSummarizer)); } if (arguments == null) { throw new ArgumentNullException(nameof(arguments)); } // Range adjustment if (arguments.FilteringConceptsCap < 0) { arguments.FilteringConceptsCap = 0; } if (arguments.MaxSummarySentences < 0) { arguments.MaxSummarySentences = 0; } if (arguments.MaxSummarySizeInPercent < 0) { arguments.MaxSummarySizeInPercent = 0; } if (arguments.MaxSummarySizeInPercent > 100) { arguments.MaxSummarySizeInPercent = 100; } List <string> summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments); if (summarizedConcepts == null) { throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetConcepts must not return null"); } List <string> summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments); if (summarizedSentences == null) { throw new InvalidOperationException($"{contentSummarizer.GetType().FullName}.GetSentences must not return null"); } return(new SummarizedDocument { Concepts = summarizedConcepts, Sentences = summarizedSentences }); }
public void throws_if_null_arguments_are_passed(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments summarizerArguments) { Assert.Throws<ArgumentNullException>( () => { Target.SummarizeAnalysedContent(analyzedDocument, contentSummarizer, summarizerArguments); }); }
public void throws_if_null_arguments_are_passed(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments summarizerArguments) { Assert.That(() => Target.SummarizeAnalysedContent(analyzedDocument, contentSummarizer, summarizerArguments), Throws.TypeOf <ArgumentNullException>()); }
/// <summary> /// Runs the content summarizing part of the summarizing algorithm /// </summary> /// <param name="analyzedDocument"></param> /// <param name="contentSummarizer"></param> /// <param name="arguments"></param> /// <returns></returns> public SummarizedDocument SummarizeAnalysedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments) { if (analyzedDocument == null) { throw new ArgumentNullException("analyzedDocument"); } if (contentSummarizer == null) { throw new ArgumentNullException("contentSummarizer"); } if (arguments == null) { throw new ArgumentNullException("arguments"); } // Range adjustment if (arguments.FilteringConceptsCap < 0) { arguments.FilteringConceptsCap = 0; } if (arguments.MaxSummarySentences < 0) { arguments.MaxSummarySentences = 0; } if (arguments.MaxSummarySizeInPercent < 0) { arguments.MaxSummarySizeInPercent = 0; } if (arguments.MaxSummarySizeInPercent > 100) { arguments.MaxSummarySizeInPercent = 100; } var summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments); if (summarizedConcepts == null) { throw new InvalidOperationException(string.Format("{0}.GetConcepts must not return null", contentSummarizer.GetType().FullName)); } var summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments); if (summarizedSentences == null) { throw new InvalidOperationException(string.Format("{0}.GetSentences must not return null", contentSummarizer.GetType().FullName)); } return(new SummarizedDocument() { Concepts = summarizedConcepts, Sentences = summarizedSentences }); }
/// <summary> /// Runs the content summarizing part of the summarizing algorithm /// </summary> /// <param name="analyzedDocument"></param> /// <param name="contentSummarizer"></param> /// <param name="arguments"></param> /// <returns></returns> public SummarizedDocument SummarizeAnalysedContent(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments arguments) { if (analyzedDocument == null) { throw new ArgumentNullException(nameof(analyzedDocument)); } if (contentSummarizer == null) { throw new ArgumentNullException(nameof(contentSummarizer)); } if (arguments == null) { throw new ArgumentNullException(nameof(arguments)); } // Range adjustment if (arguments.FilteringConceptsCap < 0) { arguments.FilteringConceptsCap = 0; } if (arguments.MaxSummarySentences < 0) { arguments.MaxSummarySentences = 0; } if (arguments.MaxSummarySizeInPercent < 0) { arguments.MaxSummarySizeInPercent = 0; } if (arguments.MaxSummarySizeInPercent > 100) { arguments.MaxSummarySizeInPercent = 100; } var summarizedConcepts = contentSummarizer.GetConcepts(analyzedDocument, arguments); if (summarizedConcepts == null) { throw new InvalidOperationException( $"{contentSummarizer.GetType().FullName}.GetConcepts must not return null"); } var summarizedSentences = contentSummarizer.GetSentences(analyzedDocument, arguments); if (summarizedSentences == null) { throw new InvalidOperationException( $"{contentSummarizer.GetType().FullName}.GetSentences must not return null"); } return new SummarizedDocument() { Concepts = summarizedConcepts, Sentences = summarizedSentences }; }
public void throws_if_null_arguments_are_passed(AnalyzedDocument analyzedDocument, IContentSummarizer contentSummarizer, ISummarizerArguments summarizerArguments) { Target.SummarizeAnalysedContent(analyzedDocument, contentSummarizer, summarizerArguments); }