Exemple #1
0
        private static void IndexOnPagesInDB_IAMLAZY()
        {
            foreach (var page in database.GetAllPages())
            {
                ToBeIndexedQueue.Enqueue(new Tuple<PrettyURL, string, DateTime>(new PrettyURL(page.url), page.html, DateTime.Now));
            }

            CountdownEvent CTE = new CountdownEvent(1);
            var indexer = new MainIndexer(stopWords, charsToRemove, ToBeIndexedQueue, CTE);
            Thread indexerThread = new Thread(() => indexer.CreateInverseIndexWriteToDB(true));
            CTE.AddCount();
            indexerThread.Start();
            CTE.Signal();
            CTE.Wait();
        }
Exemple #2
0
        private static void DoSomeCrawlingAndIndexing(int approxSites)
        {
            var dbPages = database.GetAllPages().
                Select(p => new PrettyURL(p.url));

            var seed = dbPages.Count() > 0 ?
                dbPages :
                new PrettyURL[] { new PrettyURL("newz.dk"), new PrettyURL("aau.dk"), new PrettyURL("politikken.dk") };

            Crawler.SitesToCrawl = approxSites;
            CountdownEvent CTE = new CountdownEvent(1);

            var crawler = new Crawler(numFrontQueues, numBackQueues, timeBetweenHits, maxRobotAge, seed, ToBeIndexedQueue, CTE);
            var indexer = new MainIndexer(stopWords, charsToRemove, ToBeIndexedQueue, CTE);

            Thread crawlerThread = new Thread(crawler.Crawl);
            CTE.AddCount();

            Thread indexerThread = new Thread(() => indexer.CreateInverseIndexWriteToDB(false));
            CTE.AddCount();

            crawlerThread.Start();
            indexerThread.Start();

            CTE.Signal();
            CTE.Wait();
        }