Exemple #1
0
        public List <string> GetKeywords(string text, int countOfKeywords, IDFCategory category)
        {
            var document = new Document.TajikDocument(text);

            document.Sentences.ForEach(s => s.NormalizeWords());
            var tfIdf = KEAGlobal.TFIDFManager.CalculateTFIDFWithIDF(document, category).OrderByDescending(s => s.TF_IDF).ThenByDescending(s => s.TF).ThenByDescending(s => s.IDF);

            return(tfIdf.Select(s => s.Word).Take(countOfKeywords).ToList());
        }
Exemple #2
0
        /// <summary>
        /// RU: Соберёт предложения из документа
        /// TJ: Аз матн ҷумлаҳоо ҷамъ мекунад
        /// </summary>
        public List <TajikSentence> SplitSentencesFromDoc(TajikDocument document)
        {
            List <TajikSentence> sentenceInstanses = new List <TajikSentence>();
            var sentences = Regex.Split(document.Content, Statics.SplitSentensePattern).Where(a => a != "");

            foreach (var sentense in sentences)
            {
                sentenceInstanses.Add(new TajikSentence(sentense));
            }

            return(sentenceInstanses);
        }