示例#1
0
        //Inverse document frequency
        public static void InverseDocumentFrequency(List <Reuter> reuters, List <Reuter> result)
        {
            double howManyDocumentsContainkeyword = 0;

            result.Clear();
            for (int i = 0; i < reuters.Count; i++)
            {
                if (reuters.ElementAt(i).Places.Count != 1)
                {
                    continue;
                }
                result.Add(new Reuter {
                    Places = reuters.ElementAt(i).Places, TextTemp = reuters.ElementAt(i).TextTemp
                });
                result.Last().TextTemp = result.Last().TextTemp.Replace("    ", " ");
                result.Last().Text     = result.Last().TextTemp.Split(' ', '\n', '\t').ToList();
                FeatureExtractions.HowManyWordsExtractor(result.Last());
            }

            foreach (Reuter r in result)
            {
                r.VectorFeatures = r.VectorFeatures.OrderBy(x => x.Value)
                                   .Take(10)
                                   .ToDictionary(pair => pair.Key, pair => pair.Value);
            }

            for (int i = 0; i < result.Count; ++i)
            {
                for (int j = 0; j < result[i].VectorFeatures.Count; ++j)
                {
                    foreach (Reuter r in result)
                    {
                        if (r.Text.Contains(result[i].VectorFeatures.Keys.ElementAt(j)))
                        {
                            howManyDocumentsContainkeyword++;
                        }
                    }
                    double tempDiff = (double)result.Count / howManyDocumentsContainkeyword;
                    result[i].VectorFeatures[result[i].VectorFeatures.Keys.ElementAt(j)] = Math.Log10(tempDiff);
                    howManyDocumentsContainkeyword = 0;
                }
            }
        }
示例#2
0
 public static void HowManyWords(List <Reuter> result)
 {
     FeatureExtractions.HowManyWordsExtractor(result.Last());
 }