public RocchioForm() { InitializeComponent(); indexer = Indexer.Instance; rocchio = RocchioAugmenter.Instance; webSearcher = new WebSearcher(); for (int i = 1; i < 3; i++) { comboBoxNum.Items.Add(i * 10); } }
public void IndexDocuments(List <Document> documents) { foreach (var doc in documents) { doc.TfWeights = new Dictionary <string, int>() as Dictionary <string, int>; string bodyContent = new WebSearcher().GetBodyContentFromUrl(doc.Url); doc.BodyContent = bodyContent != null ? bodyContent : doc.Summary; List <string> terms = new List <string>(); string[] tokens = doc.BodyContent.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < tokens.Length; i++) { tokens[i] = tokens[i].ToLower(); if (String.IsNullOrWhiteSpace(tokens[i]) || tokens[i].Length < 2 || tokens[i].Length > 12 || tokens[i].All(char.IsDigit) || !tokens[i].All(char.IsLetterOrDigit)) { continue; } terms.Add(tokens[i]); if (!termFrequencies.ContainsKey(tokens[i])) { termFrequencies.Add(tokens[i], 1); } else { termFrequencies[tokens[i]]++; } if (!invertedIndex.ContainsKey(tokens[i])) { invertedIndex[tokens[i]] = new Dictionary <int, List <int> >() as Dictionary <int, List <int> >; } if (!invertedIndex[tokens[i]].ContainsKey(doc.Id)) { invertedIndex[tokens[i]][doc.Id] = new List <int>() as List <int>; } invertedIndex[tokens[i]][doc.Id].Add(i); if (!doc.TfWeights.ContainsKey(tokens[i])) { doc.TfWeights.Add(tokens[i], 0); } doc.TfWeights[tokens[i]]++; } } }