forked from vrittis/OpenTextSummarizer
/
ClassicContentSummarizer.cs
41 lines (36 loc) · 1.97 KB
/
ClassicContentSummarizer.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
using OpenTextSummarizer.Interfaces;
using System.Collections.Generic;
using System.Linq;
namespace OpenTextSummarizer
{
internal class ClassicContentSummarizer : IContentSummarizer
{
public List<string> GetConcepts(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
{
if (analyzedDocument.ScoredTextUnits.Count <= summarizerArguments.FilteringConceptsCap)
{
return analyzedDocument.ScoredTextUnits.Select(tus => tus.ScoredTextUnit.FormattedValue).ToList();
}
var baseFrequency = analyzedDocument.ScoredTextUnits[summarizerArguments.FilteringConceptsCap].Score;
return analyzedDocument.ScoredTextUnits.Where(tus => tus.Score >= baseFrequency).Select(tus => tus.ScoredTextUnit.FormattedValue).ToList();
}
public List<string> GetSentences(AnalyzedDocument analyzedDocument, ISummarizerArguments summarizerArguments)
{
var totalContentWordCount = analyzedDocument.ScoredSentences.Sum(s => s.ScoredSentence.TextUnits.Count);
var targetWordCount = summarizerArguments.MaxSummarySizeInPercent * totalContentWordCount / 100;
var currentWordCount = 0;
var currentSentenceIndex = 0;
var selectedSentences = new List<Sentence>();
while (currentSentenceIndex < analyzedDocument.ScoredSentences.Count - 1 &&
selectedSentences.Count < summarizerArguments.MaxSummarySentences &&
currentWordCount < targetWordCount)
{
var selectedSentence = analyzedDocument.ScoredSentences[currentSentenceIndex].ScoredSentence;
selectedSentences.Add(selectedSentence);
currentWordCount += selectedSentence.TextUnits.Count();
currentSentenceIndex += 1;
}
return selectedSentences.OrderBy(s => s.OriginalSentenceIndex).Select(s => s.OriginalSentence).ToList();
}
}
}