public List <string> GetKeywords(string text, int countOfKeywords, IDFCategory category) { var document = new Document.TajikDocument(text); document.Sentences.ForEach(s => s.NormalizeWords()); var tfIdf = KEAGlobal.TFIDFManager.CalculateTFIDFWithIDF(document, category).OrderByDescending(s => s.TF_IDF).ThenByDescending(s => s.TF).ThenByDescending(s => s.IDF); return(tfIdf.Select(s => s.Word).Take(countOfKeywords).ToList()); }
/// <summary> /// RU: Соберёт предложения из документа /// TJ: Аз матн ҷумлаҳоо ҷамъ мекунад /// </summary> public List <TajikSentence> SplitSentencesFromDoc(TajikDocument document) { List <TajikSentence> sentenceInstanses = new List <TajikSentence>(); var sentences = Regex.Split(document.Content, Statics.SplitSentensePattern).Where(a => a != ""); foreach (var sentense in sentences) { sentenceInstanses.Add(new TajikSentence(sentense)); } return(sentenceInstanses); }