protected HashSet <CrawlerRegistry.IndexEntry> Execute(CrawlerRegistry registry, HashSet <CrawlerRegistry.IndexEntry> removals) { var entries = new HashSet <CrawlerRegistry.IndexEntry>(); switch (this.QueryType) { case QueryType.Word: entries = registry.GetIndexEntries(this.Word); break; case QueryType.And: entries = this.LeftPart.Execute(registry, removals); if (this.RightPart.QueryType == QueryType.Not) { var tempRemovals = new HashSet <CrawlerRegistry.IndexEntry>(); this.RightPart.Execute(registry, tempRemovals); entries.ExceptWith(tempRemovals); } else { entries.IntersectWith(this.RightPart.Execute(registry, removals)); } break; case QueryType.Or: entries = this.LeftPart.Execute(registry, removals); entries.UnionWith(this.RightPart.Execute(registry, removals)); break; case QueryType.Not: removals.UnionWith(registry.GetIndexEntries(this.Word)); break; } Console.WriteLine("[SubLevel] {0} entries, {1} removals.", entries.Count, removals.Count); return(entries); }
protected LinkedList <IndexEntry> Execute(List <string> tokens, CrawlerRegistry registry, int maxResults, bool usePageRank) { var queryWeights = this.CalculateQueryWeights(tokens, registry); var results = new LinkedList <IndexEntry>(); var scores = new Dictionary <int, double>(); var documentWeights = new Dictionary <int, Dictionary <string, double> >(); var totalWeights = new Dictionary <int, double>(); foreach (var token in tokens) { var entries = registry.GetIndexEntries(token); foreach (var entry in entries) { var weight = 1 + Math.Log10(entry.Frequency); if (!documentWeights.ContainsKey(entry.LinkId)) { documentWeights[entry.LinkId] = new Dictionary <string, double>(); totalWeights[entry.LinkId] = 0; } documentWeights[entry.LinkId][token] = weight; totalWeights[entry.LinkId] += weight * weight; } } foreach (var doc in documentWeights) { var totalWeight = Math.Sqrt(totalWeights[doc.Key]); scores[doc.Key] = doc.Value.Sum(token => (token.Value / totalWeight) * queryWeights[token.Key]); } var sortedScores = scores.OrderByDescending(score => score.Value) .Take(usePageRank ? maxResults * 4 : maxResults); if (usePageRank) { sortedScores = sortedScores.OrderByDescending(score => registry.PageRanks[score.Key]).Take(maxResults); } foreach (var score in sortedScores) { results.AddLast(new IndexEntry(score.Key, score.Value)); } return(results); }