Esempio n. 1
0
            protected HashSet <CrawlerRegistry.IndexEntry> Execute(CrawlerRegistry registry, HashSet <CrawlerRegistry.IndexEntry> removals)
            {
                var entries = new HashSet <CrawlerRegistry.IndexEntry>();

                switch (this.QueryType)
                {
                case QueryType.Word:
                    entries = registry.GetIndexEntries(this.Word);
                    break;

                case QueryType.And:
                    entries = this.LeftPart.Execute(registry, removals);

                    if (this.RightPart.QueryType == QueryType.Not)
                    {
                        var tempRemovals = new HashSet <CrawlerRegistry.IndexEntry>();

                        this.RightPart.Execute(registry, tempRemovals);
                        entries.ExceptWith(tempRemovals);
                    }
                    else
                    {
                        entries.IntersectWith(this.RightPart.Execute(registry, removals));
                    }
                    break;

                case QueryType.Or:
                    entries = this.LeftPart.Execute(registry, removals);
                    entries.UnionWith(this.RightPart.Execute(registry, removals));
                    break;

                case QueryType.Not:
                    removals.UnionWith(registry.GetIndexEntries(this.Word));
                    break;
                }

                Console.WriteLine("[SubLevel] {0} entries, {1} removals.", entries.Count, removals.Count);

                return(entries);
            }
Esempio n. 2
0
        protected LinkedList <IndexEntry> Execute(List <string> tokens, CrawlerRegistry registry, int maxResults, bool usePageRank)
        {
            var queryWeights    = this.CalculateQueryWeights(tokens, registry);
            var results         = new LinkedList <IndexEntry>();
            var scores          = new Dictionary <int, double>();
            var documentWeights = new Dictionary <int, Dictionary <string, double> >();
            var totalWeights    = new Dictionary <int, double>();

            foreach (var token in tokens)
            {
                var entries = registry.GetIndexEntries(token);

                foreach (var entry in entries)
                {
                    var weight = 1 + Math.Log10(entry.Frequency);

                    if (!documentWeights.ContainsKey(entry.LinkId))
                    {
                        documentWeights[entry.LinkId] = new Dictionary <string, double>();
                        totalWeights[entry.LinkId]    = 0;
                    }

                    documentWeights[entry.LinkId][token] = weight;
                    totalWeights[entry.LinkId]          += weight * weight;
                }
            }

            foreach (var doc in documentWeights)
            {
                var totalWeight = Math.Sqrt(totalWeights[doc.Key]);
                scores[doc.Key] = doc.Value.Sum(token => (token.Value / totalWeight) * queryWeights[token.Key]);
            }

            var sortedScores =
                scores.OrderByDescending(score => score.Value)
                .Take(usePageRank ? maxResults * 4 : maxResults);

            if (usePageRank)
            {
                sortedScores = sortedScores.OrderByDescending(score => registry.PageRanks[score.Key]).Take(maxResults);
            }

            foreach (var score in sortedScores)
            {
                results.AddLast(new IndexEntry(score.Key, score.Value));
            }

            return(results);
        }