Ejemplo n.º 1
0
        public override string Summarize(string text, string lang = "en")
        {
            var sentences = SplitTextOnSentences(text);

            int summarySize = 3;

            if (sentences.Count <= summarySize)
            {
                return(text);
            }

            _stopWordFilter = _nlpServiceProvider.GetStopWordFilter(lang);

            var stemmer = _nlpServiceProvider.GetStemmer(lang);

            var tokenizedSentences = new List <IList <string> >();

            foreach (var sentence in sentences)
            {
                var tokenizer = new TextTokenizer(sentence, filterMapper: new TextFilterMapper {
                    Map = (t) => stemmer.Stem(t)
                });
                tokenizedSentences.Add(tokenizer.ToList());
            }

            var matrix = BuildSimilarityMatrix(tokenizedSentences);

            var graph = BuildDirectedGraph(matrix);

            var result = new PageRank()
                         .Rank(graph)
                         .OrderBy(kv => kv.Value);    //Less value, better result

            var summary      = "";
            var topSentances = result.Take(summarySize).OrderBy(kv => kv.Key); //Sentences order in text

            foreach (var topSent in topSentances)
            {
                summary += sentences[topSent.Key] + ". ";
            }

            return(summary);
        }