public Spider(Frontier frontier, Index index, Filtering.Filter filter, Action<Index> callback)
 {
     this.frontier = frontier;
     this.index = Index.CreateEmptyCopy(index);
     this.filter = filter;
     this.callback = callback;
 }
        public void MergeIn(Index index)
        {
            List<Document> unique = new List<Document>(index.sites);

            for (int i = 0; i < unique.Count; i++)
                foreach (var s in sites)
                {
                    double simi = similarity.CalculateSimilarity(s, unique[i]);
                    if (simi >= 0.9)
                    {
                        unique.RemoveAt(i--);
                        break;
                    }
                }

            foreach (var doc in unique)
                sites.Add(doc);

            foreach (var term in index.stems.Keys)
            {
                if (!stems.ContainsKey(term))
                    stems.Add(term, index.stems[term]);
                else
                    stems[term].MergeInto(index.stems[term], (a, b) => a.Document.Id.CompareTo(b.Document.Id), d => !unique.Contains(d.Document));
            }
        }
        public Ranker(Index index, TermStemmer stemmer)
        {
            int docCount = index.SiteCount;

            foreach (var t in index.GetStems())
                tf_idf.Add(t.Key, getTF_IDF(t.Value, docCount));

            Document[] keys = lengths.Keys.ToArray();
            foreach (var d in keys)
                lengths[d] = Math.Sqrt(lengths[d]);

            this.index = index;
            this.stemmer = stemmer;
            //this.TF_WT = getTF_WT();
            //this.IDF_WT = getIDF_WT();
            //this.TF_IDF_WT = getTF_IDF_WT();
            //this.NORM_WT = getNORM_WT();
        }
        public static void StartAndWait(Frontier frontier, Index index, Filtering.Filter filter, int pagecount)
        {
            int count = (int)Math.Ceiling(pagecount / (double)SPIDER_PAGE_COUNT);

            Spider[] spiders = new Spider[count];
            Thread[] threads = new Thread[count];

            for (int i = 0; i < count; i++)
            {
                Spider sp = spiders[i] = new Spider(frontier, index, filter, ind =>
                {
                    Console.ForegroundColor = ConsoleColor.Cyan;
                    Console.WriteLine("Merging Index of {0}", ind.SiteCount);
                    Console.ForegroundColor = ConsoleColor.Gray;

                    lock (index) { index.MergeIn(ind); }
                });
                threads[i] = new Thread(() => sp.Run());
                threads[i].Start();
            }

            for (int i = 0; i < count; i++)
                threads[i].Join();
        }
 public static Index CreateEmptyCopy(Index copyFrom)
 {
     return new Index(copyFrom.stemmer, copyFrom.similarity);
 }
 public static Index CreateEmptyCopy(Index copyFrom)
 {
     return(new Index(copyFrom.stemmer, copyFrom.similarity));
 }