private void go(string url) { int links = 0; TimeSpan time = TimeSpan.Zero; HttpRequest hr = new HttpRequest(); hr.CookieContainer = _cc; Exception ex = Lib.Trys(delegate() { hr.Action = url; hr.Referer = hr.Address.AbsoluteUri; hr.Charset = "gb2312"; DateTime dt = DateTime.Now; hr.Send(); time = DateTime.Now - dt; }, 3); if (ex != null) { throw ex; } if (hr.Response.ContentType.ToLower().IndexOf("text/html") != -1) { string html = hr.Response.Html; MatchCollection mc = _regexUrls.Matches(html); foreach (Match m in mc) { string url2 = m.Groups[5].Value.Trim('"', '\'', '\r', '\n', '\t', ' '); url2 = hr.Response.TranslateUrlToAbsolute(url2); Uri uri = null; if (Uri.TryCreate(url2, UriKind.Absolute, out uri) && uri.Scheme.IndexOf("http", StringComparison.CurrentCultureIgnoreCase) == 0) { if (Array.FindIndex <string>(_acceptHosts, delegate(string host) { return(string.Compare(host, uri.Host, false) == 0); }) == -1) { continue; } if (Array.FindIndex <string>(_accpetExternal, delegate(string ext) { return(string.Compare(ext, Path.GetExtension(uri.AbsolutePath), false) == 0); }) == -1) { continue; } _wq.Enqueue(delegate() { try { go(url2); } catch (Exception eex) { Interlocked.Increment(ref _errors); if (OnError != null) { NicSpiderErrorEventArgs e = new NicSpiderErrorEventArgs(eex, _errors); OnError(this, e); } } }); } } links = mc.Count; } Interlocked.Increment(ref _completed); if (OnStatus != null) { NicSpiderStatusEventArgs e = new NicSpiderStatusEventArgs(_completed); OnStatus(this, e); } if (OnData != null) { NicSpiderDataEventArgs e = new NicSpiderDataEventArgs(hr.Head, hr.Response, links, time); OnData(this, e); } hr.Dispose(); }
private void spider_OnData(object sender, NicSpiderDataEventArgs e) { this.appendLog(e.Response.Action, e.Links, e.Time, e.Response.StatusCode.ToString(), e.Request + "\r\n\r\n" + e.Response.Head + "\r\n\r\n" + e.Response.Xml); }