/// <summary> /// Calculate probability of individual words /// </summary> private void calculateWordProbability() { var tempCollection = new Dictionary <string, double>(); double probability; foreach (var item in WordCountsDictionary) { probability = item.Value / WordCountsDictionary.Sum(x => x.Value); tempCollection.Add(item.Key, probability); } WordCountsDictionary = tempCollection; }
/// <summary> /// Performs Hybrid TF operation /// </summary> public void PerformHybridTF() { foreach (var review in ReviewList) { try { var words = review.Split(' ').ToList(); words.Remove(" "); words.Remove(""); words = RemoveDuplicates(words); foreach (var word in words) { AddWordToDictionary(word); } } catch (Exception) { } } CalculateScoreForEachReview(WordCountsDictionary.Sum(x => x.Value)); ReorderReviewsBasedOnScore(); }
/// <summary> /// Performs Hybrid TF operation /// </summary> /// <param name="thresholdValue"></param> public void PerformHybridTFIDF(double thresholdValue = 0.7) { foreach (var review in ReviewList) { try { var words = review.Split(' ').ToList(); words.Remove(" "); words.Remove(""); //words = RemoveDuplicates(words); foreach (var word in words) { AddWordToDictionary(word); } } catch (Exception) { } } CalculateNumberOfOccuranceOfWordsInDocument(); CalculateScoreForEachReview(WordCountsDictionary.Sum(x => x.Value)); ReorderReviewsBasedOnScore(); ApplyCosineSimilarityToRemoveSimilarConsecutiveReviews(thresholdValue); }