// will still follow links in comments static void Main(string[] args) { var crawl = new WebCrawler("http://kamcord.com", "http://kamcord.com/developers/"); var links = crawl.GetAllLinks(); foreach (var link in links) { System.Console.WriteLine(link); } }
static void Init() { string url = @"http://www.bxwx3.org/txt/48595/169891/htm"; Console.WriteLine("Now loading " + url); wc = new WebCrawler(MCrawlerParam[1]); wc.OnDone += (obj, e) => { isStop = true; wc.Stop(); wc.Dispose(); }; }
static void Main(string[] args) { WebCrawler crawler = new WebCrawler("http://suw.biblos.pk.edu.pl/userHomepage&uId=722& rel=BPP-author", new ChromeDriver(DriverPath.path)); crawler.NavigateToPage(); crawler.SetAuthorPublicationNumber(); crawler.totalPages = (crawler.publicationsNumber + crawler.pageSize - 1) / crawler.pageSize; crawler.GetAuthorPublications(); foreach (var item in crawler.publications) { Console.WriteLine(item); } }
private void start_Click(object sender, EventArgs e) { testLable.Text = "开始爬行...."; WebCrawler myCrawler = new WebCrawler(urls); int k = int.Parse(textBox1.Text); if (k > 0 && k < 15) { myCrawler.maxCount = k; } else { myCrawler.maxCount = 10; } myCrawler.label = testLable; string startUrl = textForWeb.Text; try { myCrawler.urls.Add(startUrl, false); } catch (ArgumentException) { } // myCrawler.Crawl(); lock (urls) { myCrawler.Crawl(); } Parallel.Invoke(new Action[] { () => myCrawler.Crawl(), () => myCrawler.Crawl() }); testLable.Text += "\n爬行结束"; // new Thread(myCrawler.Crawl).Start(); }
/// <summary> /// Initializes a new instance of the <see cref="MainWindow"/> class. /// </summary> public MainWindow() { this.InitializeComponent(); this.logger = new Logger(this.loggingTextBlock); this.webCrawler = new WebCrawler(this.logger); }