public void RunCrawler() { Crawler crawler = new Crawler(); crawler.SetStatus = new Crawler.StatusOutput(AddOutput); List<URLInfo> badUrls; List<URLInfo> urlsToCrawl = new List<URLInfo>(); URLInfo info = new URLInfo(); info.URL = txtSite.Text; info.RootURL = txtSite.Text; info.LastCrawled = DateTime.MinValue; urlsToCrawl.Add(info); List<URLInfo> urls = crawler.Crawl(urlsToCrawl, out badUrls, Int32.MaxValue); AddOutput(urls.Count + " URLs found. " + badUrls.Count + " bad URLs.\n\r"); String path = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper\\sitemap.txt"; TextWriter writer = new StreamWriter(path); foreach (URLInfo str in urls) { writer.WriteLine(str.URL); } writer.Flush(); writer.Close(); path = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper\\badurls.txt"; writer = new StreamWriter(path); foreach (URLInfo str in badUrls) { writer.WriteLine(str.URL); } writer.Flush(); writer.Close(); SetStatus("Crawl completed. Check sitemap.txt in the app data folder.\r\n"); Process.Start(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper"); EnableGoButton(); }
static int Main(string[] args) { if (args.Length < 1) { Console.WriteLine("You need to supply the name of a site to crawl."); Console.WriteLine("Usage: CrawlMapper.exe <site>"); return 0; } //if (args.Length < 1) //{ // Console.WriteLine("You need to supply the name of a file containing a list of site names to crawl."); // return 0; //} //StreamReader sitefile; //try //{ // sitefile = new StreamReader(args[0]); //} //catch (Exception) //{ // Console.WriteLine("File " + args[0] + " not found."); // return 0; //} String outputfile = "sitemap.txt"; if (args.Length > 1) { outputfile = args[1]; } int maxurls = Int32.MaxValue; if (args.Length > 2) { Int32.TryParse(args[2], out maxurls); } maxurls = ReleaseSettings.SetMaxUrls(Int32.MaxValue); List<URLInfo> sitesToCrawl = new List<URLInfo>(); string line = String.Empty; //while (( line = sitefile.ReadLine()) != null ) //{ URLInfo site = new URLInfo(); //site.RootURL = line; //site.URL = line; site.RootURL = args[0]; site.URL = args[0]; site.LastCrawled = DateTime.MinValue; sitesToCrawl.Add(site); //} //sitefile.Close(); //Console.WriteLine("Loaded site file: " + args[0]); Console.WriteLine("Crawling Site: " + args[0]); Crawler crawler = new Crawler(); List<URLInfo> badUrls; List<URLInfo> urls = crawler.Crawl(sitesToCrawl, out badUrls, maxurls); TextWriter writer = new StreamWriter(outputfile); Console.WriteLine(urls.Count + " URLs found."); foreach (URLInfo str in urls) { writer.WriteLine(str.URL); //writer.WriteLine("URL: " + str.URL + ", Desc: " + str.PageDescription + ", Keyw: " + str.PageKeywords + ", Title: " + str.PageTitle); } Console.WriteLine("Crawl completed. Check the " + outputfile + " file in the executable directory."); writer.Flush(); writer.Close(); writer = new StreamWriter("badurls.txt"); Console.WriteLine(badUrls.Count + " bad URLs."); foreach (URLInfo str in badUrls) { writer.WriteLine(str.URL); } Console.WriteLine("Check the badurls.txt file in the executable directory for any un-crawlable URLs."); writer.Flush(); writer.Close(); return 0; }