Esempio n. 1
0
 /// <summary>
 /// Used to calculate the Term Frequency - Inverse Document Frequency of a word.
 /// Make sure that the string contains only one word and that it has been processed in the same way your attributes in the GroupToCompare were.
 /// </summary>
 /// <param name="str">The single word you wish to get a weight for.</param>
 /// <param name="GroupToCompare">The group you are using to find it's weight</param>
 /// <returns>The weight for the word, higher values means the term is more substantial</returns>
 public static double GetTermWeight(this String str, ComparisonGroup GroupToCompare)
 {
     //Check to see that the term exists in the group
     if (GroupToCompare.AttributeFrequency(str) == 0)
     {
         return(0);
     }
     //log(N/nt)
     return(Math.Log10(1 + GroupToCompare.ItemCount / GroupToCompare.AttributeFrequency(str)));
 }
Esempio n. 2
0
        static void Main(string[] args)
        {
            var wc           = new WebClient();
            var wizardOfOz   = new ComparisonItem(wc.DownloadString("https://www.gutenberg.org/cache/epub/420/pg420.txt").GetWordCount());
            var RobinHood    = new ComparisonItem(wc.DownloadString("https://www.gutenberg.org/cache/epub/964/pg964.txt").GetWordCount());
            var fantasyGroup = new ComparisonGroup()
            {
            };

            fantasyGroup.AddItem(wizardOfOz);
            fantasyGroup.AddItem(RobinHood);

            var l = "You who so plod amid serious things that you feel it shame to give yourself up even for a few short moments to mirth and joyousness".SearchScore(fantasyGroup);
        }
Esempio n. 3
0
 public static List <Tuple <ComparisonItem, Double> > SearchScore(this String search, ComparisonGroup GroupToCompare)
 {
     return(SearchScore(new ComparisonItem(Processing.WordCount.GetWordCount(search)), GroupToCompare));
 }
Esempio n. 4
0
        public static List <Tuple <ComparisonItem, Double> > SearchScore(this ComparisonItem mainItem, ComparisonGroup GroupToCompare)
        {
            var ret = new List <Tuple <ComparisonItem, Double> >();

            //Iterate through the group of items to calculate that items score
            foreach (var item in GroupToCompare.Items)
            {
                //Holds the sum of the score for this item
                var sum = 0.0;
                //Iterate through each term
                foreach (var term in mainItem.Attributes)
                {
                    var itemValue  = item.GetValue(term.Key);
                    var termWeight = GetTermWeight(term.Key, GroupToCompare);
                    var maxValue   = GroupToCompare.MaxAttributeOccurence(term.Key);
                    //If the term exists
                    if (maxValue != 0 && itemValue != 0)
                    {
                        sum += (itemValue / (maxValue)) * termWeight;
                    }
                }
                ret.Add(new Tuple <ComparisonItem, double>(item, sum));
            }
            return(ret);
        }