Beispiel #1
0
        public void RunCrawler()
        {
            Crawler crawler = new Crawler();
            crawler.SetStatus = new Crawler.StatusOutput(AddOutput);
            List<URLInfo> badUrls;
            List<URLInfo> urlsToCrawl = new List<URLInfo>();
            URLInfo info = new URLInfo();
            info.URL = txtSite.Text;
            info.RootURL = txtSite.Text;
            info.LastCrawled = DateTime.MinValue;
            urlsToCrawl.Add(info);
            List<URLInfo> urls = crawler.Crawl(urlsToCrawl, out badUrls, Int32.MaxValue);
            AddOutput(urls.Count + " URLs found.  " + badUrls.Count + " bad URLs.\n\r");
            String path = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper\\sitemap.txt";
            TextWriter writer = new StreamWriter(path);
            foreach (URLInfo str in urls)
            {
                writer.WriteLine(str.URL);
            }
            writer.Flush();
            writer.Close();
            path = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper\\badurls.txt";
            writer = new StreamWriter(path);
            foreach (URLInfo str in badUrls)
            {
                writer.WriteLine(str.URL);
            }
            writer.Flush();
            writer.Close();

            SetStatus("Crawl completed. Check sitemap.txt in the app data folder.\r\n");
            Process.Start(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper");
            EnableGoButton();
        }
Beispiel #2
0
 static int Main(string[] args)
 {
     if (args.Length < 1)
     {
         Console.WriteLine("You need to supply the name of a site to crawl.");
         Console.WriteLine("Usage:  CrawlMapper.exe <site>");
         return 0;
     }
     //if (args.Length < 1)
     //{
     //    Console.WriteLine("You need to supply the name of a file containing a list of site names to crawl.");
     //    return 0;
     //}
     //StreamReader sitefile;
     //try
     //{
     //    sitefile = new StreamReader(args[0]);
     //}
     //catch (Exception)
     //{
     //    Console.WriteLine("File " + args[0] + " not found.");
     //    return 0;
     //}
     String outputfile = "sitemap.txt";
     if (args.Length > 1)
     {
         outputfile = args[1];
     }
     int maxurls = Int32.MaxValue;
     if (args.Length > 2)
     {
         Int32.TryParse(args[2], out maxurls);
     }
     maxurls = ReleaseSettings.SetMaxUrls(Int32.MaxValue);
     List<URLInfo> sitesToCrawl = new List<URLInfo>();
     string line = String.Empty;
     //while (( line = sitefile.ReadLine()) != null )
     //{
         URLInfo site = new URLInfo();
         //site.RootURL = line;
         //site.URL = line;
         site.RootURL = args[0];
         site.URL = args[0];
         site.LastCrawled = DateTime.MinValue;
         sitesToCrawl.Add(site);
     //}
     //sitefile.Close();
     //Console.WriteLine("Loaded site file: " + args[0]);
     Console.WriteLine("Crawling Site: " + args[0]);
     Crawler crawler = new Crawler();
     List<URLInfo> badUrls;
     List<URLInfo> urls = crawler.Crawl(sitesToCrawl, out badUrls, maxurls);
     TextWriter writer = new StreamWriter(outputfile);
     Console.WriteLine(urls.Count + " URLs found.");
     foreach (URLInfo str in urls)
     {
         writer.WriteLine(str.URL);
         //writer.WriteLine("URL: " + str.URL + ", Desc: " + str.PageDescription + ", Keyw: " + str.PageKeywords + ", Title: " + str.PageTitle);
     }
     Console.WriteLine("Crawl completed.  Check the " + outputfile + " file in the executable directory.");
     writer.Flush();
     writer.Close();
     writer = new StreamWriter("badurls.txt");
     Console.WriteLine(badUrls.Count + " bad URLs.");
     foreach (URLInfo str in badUrls)
     {
         writer.WriteLine(str.URL);
     }
     Console.WriteLine("Check the badurls.txt file in the executable directory for any un-crawlable URLs.");
     writer.Flush();
     writer.Close();
     return 0;
 }