private static void IndexOnPagesInDB_IAMLAZY() { foreach (var page in database.GetAllPages()) { ToBeIndexedQueue.Enqueue(new Tuple<PrettyURL, string, DateTime>(new PrettyURL(page.url), page.html, DateTime.Now)); } CountdownEvent CTE = new CountdownEvent(1); var indexer = new MainIndexer(stopWords, charsToRemove, ToBeIndexedQueue, CTE); Thread indexerThread = new Thread(() => indexer.CreateInverseIndexWriteToDB(true)); CTE.AddCount(); indexerThread.Start(); CTE.Signal(); CTE.Wait(); }
private static void DoSomeSearching() { Console.WriteLine("\nSearch string:"); string input = Console.ReadLine(); var x = new MainIndexer(null, null, null, null).DoStuffOnInputString(input); input = string.Join(" ", x); var result = Searcher.SearchAndGetURLs(input).Take(10); if (result.Count() > 0) { foreach (var res in result) { Console.WriteLine(res); } } else { Console.WriteLine("No results"); } }
private static void DoSomeCrawlingAndIndexing(int approxSites) { var dbPages = database.GetAllPages(). Select(p => new PrettyURL(p.url)); var seed = dbPages.Count() > 0 ? dbPages : new PrettyURL[] { new PrettyURL("newz.dk"), new PrettyURL("aau.dk"), new PrettyURL("politikken.dk") }; Crawler.SitesToCrawl = approxSites; CountdownEvent CTE = new CountdownEvent(1); var crawler = new Crawler(numFrontQueues, numBackQueues, timeBetweenHits, maxRobotAge, seed, ToBeIndexedQueue, CTE); var indexer = new MainIndexer(stopWords, charsToRemove, ToBeIndexedQueue, CTE); Thread crawlerThread = new Thread(crawler.Crawl); CTE.AddCount(); Thread indexerThread = new Thread(() => indexer.CreateInverseIndexWriteToDB(false)); CTE.AddCount(); crawlerThread.Start(); indexerThread.Start(); CTE.Signal(); CTE.Wait(); }