public AdminApprovalHistory(IStopWordFilter stopWordFilter, IDataExtractor dataExtractor) { IDbConnectionManager odbcConnectionManager = new OdbcConnectionManager(ConfigurationManager.AppSettings["ConnectionString"]); IWordsDataSource wordDataSource = new OdbcWordsDataSource(odbcConnectionManager); Classifier = new BayesianClassifier(wordDataSource); StopWordFilter = stopWordFilter; DataExtractor = dataExtractor; }
public KeywordExtractor(INlpServiceProvider nlpServices, string lang) { _stopWordFilter = nlpServices.GetStopWordFilter(lang); _wordStemmer = nlpServices.GetStemmer(lang); _filter = (word) => { return(!_stopWordFilter.IsStopWord(word)); }; _mapper = (word) => { return(_wordStemmer.Stem(word)); }; }
public override string Summarize(string text, string lang = "en") { var sentences = SplitTextOnSentences(text); int summarySize = 3; if (sentences.Count <= summarySize) { return(text); } _stopWordFilter = _nlpServiceProvider.GetStopWordFilter(lang); var stemmer = _nlpServiceProvider.GetStemmer(lang); var tokenizedSentences = new List <IList <string> >(); foreach (var sentence in sentences) { var tokenizer = new TextTokenizer(sentence, filterMapper: new TextFilterMapper { Map = (t) => stemmer.Stem(t) }); tokenizedSentences.Add(tokenizer.ToList()); } var matrix = BuildSimilarityMatrix(tokenizedSentences); var graph = BuildDirectedGraph(matrix); var result = new PageRank() .Rank(graph) .OrderBy(kv => kv.Value); //Less value, better result var summary = ""; var topSentances = result.Take(summarySize).OrderBy(kv => kv.Key); //Sentences order in text foreach (var topSent in topSentances) { summary += sentences[topSent.Key] + ". "; } return(summary); }
public KeywordExtractor(IStopWordFilter filter) { _stopWords = filter; }
public KeywordExtractor() { _stopWords = new BasicStopWordFilter(); LeagueTable = new SortedList <string, WordScore>(); }
public KeywordExtractor() { _stopWords = new EnglishSmartStopWordFilter(); }
public KeywordExtractor() { _stopWords = new BasicStopWordFilter(); }