public List <TFIDFView> CalculateTFIDFWithIDF(Document.TajikDocument documentToCalculate, IDFCategory category = null) { List <TFIDFView> tFIDFViews = new List <TFIDFView>(); var wordsOfDocumentToCalculate = documentToCalculate.Sentences.SelectMany(s => s.Words).ToList(); foreach (var wordToCalculate in wordsOfDocumentToCalculate.GroupBy(s => s.Value).Select(s => s.FirstOrDefault())) { var tFValue = CalCulateTF(wordToCalculate, documentToCalculate); double idfValue; var res = KEAGlobal.Context.Words.FirstOrDefault(s => s.Content == wordToCalculate.Value); if (res != null) { if (category != null) { var categoryLink = res.IDFCategoryLinks?.FirstOrDefault(s => s.Category?.Guid == category.Guid); if (categoryLink != null && categoryLink.IDF > 0) { idfValue = categoryLink.IDF; } else { idfValue = res.CommonIDF; } } else { idfValue = res.CommonIDF; } } else { List <Document.TajikDocument> documentsDataSet = new List <Document.TajikDocument> { documentToCalculate }; idfValue = CalCulateIDF(documentsDataSet, wordToCalculate); } tFIDFViews.Add(CalculateTFIDF(wordToCalculate.Value, idfValue, tFValue)); } return(tFIDFViews); }
public TF(TajikWord termin, Document.TajikDocument document) { Termin = termin; Document = document; }
public List <TFIDFView> CalculateTFIDF(List <Document.TajikDocument> documentsDataSet, Document.TajikDocument documentToCalculate) { List <TFIDFView> tFIDFViews = new List <TFIDFView>(); var wordsOfDocumentToCalculate = documentToCalculate.Sentences.SelectMany(s => s.Words).ToList(); foreach (var wordToCalculate in wordsOfDocumentToCalculate.GroupBy(s => s.Value).Select(s => s.FirstOrDefault())) { var tFValue = CalCulateTF(wordToCalculate, documentToCalculate); double idfValue = CalCulateIDF(documentsDataSet, wordToCalculate); tFIDFViews.Add(CalculateTFIDF(wordToCalculate.Value, idfValue, tFValue)); } return(tFIDFViews); }
public double CalCulateTF(TajikWord wordToCalculate, Document.TajikDocument documentToCalculate) { TF tF = new TF(wordToCalculate, documentToCalculate); return(tF.CalculateTF()); }