public List <TFIDFView> CalculateTFIDFWithIDF(Document.TajikDocument documentToCalculate, IDFCategory category = null)
        {
            List <TFIDFView> tFIDFViews    = new List <TFIDFView>();
            var wordsOfDocumentToCalculate = documentToCalculate.Sentences.SelectMany(s => s.Words).ToList();

            foreach (var wordToCalculate in wordsOfDocumentToCalculate.GroupBy(s => s.Value).Select(s => s.FirstOrDefault()))
            {
                var    tFValue = CalCulateTF(wordToCalculate, documentToCalculate);
                double idfValue;
                var    res = KEAGlobal.Context.Words.FirstOrDefault(s => s.Content == wordToCalculate.Value);

                if (res != null)
                {
                    if (category != null)
                    {
                        var categoryLink = res.IDFCategoryLinks?.FirstOrDefault(s => s.Category?.Guid == category.Guid);
                        if (categoryLink != null && categoryLink.IDF > 0)
                        {
                            idfValue = categoryLink.IDF;
                        }
                        else
                        {
                            idfValue = res.CommonIDF;
                        }
                    }
                    else
                    {
                        idfValue = res.CommonIDF;
                    }
                }
                else
                {
                    List <Document.TajikDocument> documentsDataSet = new List <Document.TajikDocument> {
                        documentToCalculate
                    };
                    idfValue = CalCulateIDF(documentsDataSet, wordToCalculate);
                }
                tFIDFViews.Add(CalculateTFIDF(wordToCalculate.Value, idfValue, tFValue));
            }

            return(tFIDFViews);
        }
예제 #2
0
 public TF(TajikWord termin, Document.TajikDocument document)
 {
     Termin   = termin;
     Document = document;
 }
        public List <TFIDFView> CalculateTFIDF(List <Document.TajikDocument> documentsDataSet, Document.TajikDocument documentToCalculate)
        {
            List <TFIDFView> tFIDFViews    = new List <TFIDFView>();
            var wordsOfDocumentToCalculate = documentToCalculate.Sentences.SelectMany(s => s.Words).ToList();

            foreach (var wordToCalculate in wordsOfDocumentToCalculate.GroupBy(s => s.Value).Select(s => s.FirstOrDefault()))
            {
                var    tFValue  = CalCulateTF(wordToCalculate, documentToCalculate);
                double idfValue = CalCulateIDF(documentsDataSet, wordToCalculate);
                tFIDFViews.Add(CalculateTFIDF(wordToCalculate.Value, idfValue, tFValue));
            }

            return(tFIDFViews);
        }
        public double CalCulateTF(TajikWord wordToCalculate, Document.TajikDocument documentToCalculate)
        {
            TF tF = new TF(wordToCalculate, documentToCalculate);

            return(tF.CalculateTF());
        }