//Inverse document frequency public static void InverseDocumentFrequency(List <Reuter> reuters, List <Reuter> result) { double howManyDocumentsContainkeyword = 0; result.Clear(); for (int i = 0; i < reuters.Count; i++) { if (reuters.ElementAt(i).Places.Count != 1) { continue; } result.Add(new Reuter { Places = reuters.ElementAt(i).Places, TextTemp = reuters.ElementAt(i).TextTemp }); result.Last().TextTemp = result.Last().TextTemp.Replace(" ", " "); result.Last().Text = result.Last().TextTemp.Split(' ', '\n', '\t').ToList(); FeatureExtractions.HowManyWordsExtractor(result.Last()); } foreach (Reuter r in result) { r.VectorFeatures = r.VectorFeatures.OrderBy(x => x.Value) .Take(10) .ToDictionary(pair => pair.Key, pair => pair.Value); } for (int i = 0; i < result.Count; ++i) { for (int j = 0; j < result[i].VectorFeatures.Count; ++j) { foreach (Reuter r in result) { if (r.Text.Contains(result[i].VectorFeatures.Keys.ElementAt(j))) { howManyDocumentsContainkeyword++; } } double tempDiff = (double)result.Count / howManyDocumentsContainkeyword; result[i].VectorFeatures[result[i].VectorFeatures.Keys.ElementAt(j)] = Math.Log10(tempDiff); howManyDocumentsContainkeyword = 0; } } }
public static void HowManyWords(List <Reuter> result) { FeatureExtractions.HowManyWordsExtractor(result.Last()); }