static void Main(string[] args) { Console.WindowWidth += 50; frontier = new Frontier(new Exclusions()); frontier.Add(new URL("http://en.wikipedia.org/wiki/Teenage_Mutant_Ninja_Turtles")); Filter filter = new DomainFilter("en.wikipedia.org") & new ExtentionFilter(false, "jpg", "jpeg", "gif", "png", "rar", "zip", "exe", "pdf"); DateTime start = DateTime.Now; Crawler.StartAndWait(frontier, index, filter, 100); DateTime end = DateTime.Now; Console.WriteLine("Crawler done in {0:0.00} sec ({1:0.00} pages per sec).", (end - start).TotalSeconds, index.SiteCount / (end - start).TotalSeconds); Console.WriteLine("Press any key to start querying."); Console.ReadKey(true); Console.WriteLine(); start = DateTime.Now; Ranker r = new Ranker(index, TrimmingStemmer.GetStemmer(PorterStemmer.StemTerm)); end = DateTime.Now; Console.WriteLine("Ranker created in {0:0.00} sec.", (end - start).TotalSeconds); string searchQuery = ""; while (true) { Console.WriteLine("Query for data below. Enter an empty string to quit."); Console.Write("Search for: "); searchQuery = Console.ReadLine(); if (searchQuery == "") { break; } start = DateTime.Now; foreach (var doc in r.GetHits(searchQuery).OrderByDescending(x => x.Item2)) { Console.WriteLine("Rank: {1:0.000000} for: {0}", doc.Item1.URL.Address, doc.Item2); } end = DateTime.Now; Console.WriteLine("Query completed in {0:0.00} sec", (end - start).TotalSeconds); Console.WriteLine(); } frontier.Kill(); }
static void Main(string[] args) { Console.WindowWidth += 50; frontier = new Frontier(new Exclusions()); frontier.Add(new URL("http://en.wikipedia.org/wiki/Teenage_Mutant_Ninja_Turtles")); Filter filter = new DomainFilter("en.wikipedia.org") & new ExtentionFilter(false, "jpg", "jpeg", "gif", "png", "rar", "zip", "exe", "pdf"); DateTime start = DateTime.Now; Crawler.StartAndWait(frontier, index, filter, 100); DateTime end = DateTime.Now; Console.WriteLine("Crawler done in {0:0.00} sec ({1:0.00} pages per sec).", (end - start).TotalSeconds, index.SiteCount / (end - start).TotalSeconds); Console.WriteLine("Press any key to start querying."); Console.ReadKey(true); Console.WriteLine(); start = DateTime.Now; Ranker r = new Ranker(index, TrimmingStemmer.GetStemmer(PorterStemmer.StemTerm)); end = DateTime.Now; Console.WriteLine("Ranker created in {0:0.00} sec.", (end - start).TotalSeconds); string searchQuery = ""; while (true) { Console.WriteLine("Query for data below. Enter an empty string to quit."); Console.Write("Search for: "); searchQuery = Console.ReadLine(); if (searchQuery == "") break; start = DateTime.Now; foreach (var doc in r.GetHits(searchQuery).OrderByDescending(x => x.Item2)) Console.WriteLine("Rank: {1:0.000000} for: {0}", doc.Item1.URL.Address, doc.Item2); end = DateTime.Now; Console.WriteLine("Query completed in {0:0.00} sec", (end - start).TotalSeconds); Console.WriteLine(); } frontier.Kill(); }
public void Run() { int count = 0; Document doc = null; while (doc == null) { doc = frontier.Next(); } while (doc != null) { if (index.TryAddUrl(doc)) { var links = GetLinks(doc.URL, doc.HTML).ToArray(); int c = 0; foreach (var l in links) { if (filter.Allow(l)) { frontier.Add(l); c++; } } } Console.WriteLine("{0}", doc.URL); count++; if (count == SPIDER_PAGE_COUNT) { break; } doc = frontier.Next(); } callback(this.index); }