/// <summary> /// request by chromium /// </summary> /// <param name="request">Crawl Request</param> /// <returns>Crawl Response</returns> public async Task <Response> RequestAsync(Request request) { var args = new List <string> { "--no-sandbox" }; if (request.Proxy != null) { args.Add("--proxy-server=" + request.Proxy.Uri.ToString()); } var launchOptions = new LaunchOptions { Headless = true, Args = args.ToArray() }; using (var browser = await Puppeteer.LaunchAsync(launchOptions)) using (var page = await browser.NewPageAsync()) { try { //Authenticate set if (!string.IsNullOrEmpty(request.Username) && !string.IsNullOrEmpty(request.Password)) { await page.AuthenticateAsync(new Credentials { Username = request.Username, Password = request.Password }); } //cookie set var cookie = GetCookie(request); var cookies = GenerateCookieParam(cookie); await page.SetCookieAsync(cookies); //hearder set var dic = request.Headers.ToDictionary(h => h.Name, h => h.Value); await page.SetExtraHttpHeadersAsync(dic); //ua set var ua = request.Headers.SingleOrDefault(m => m.Name == "User-Agent").Value; await page.SetUserAgentAsync(ua); var res = await page.GoToAsync(Uri.EscapeUriString(request.Uri.ToString())); var htmlString = await page.GetContentAsync(); var response = new Response(); response.Headers = WebHeader.FromDictionary(res.Headers); response.Data = htmlString; response.StatusCode = res.Status; return(response); } catch (Exception ex) { throw new Exception("chromium error:" + ex.Message); } } }
public Response Request(Request request) { if (!string.IsNullOrEmpty(request.Ip)) { if (!IPHelper.IsHostIPAddress(IPAddress.Parse(request.Ip))) { return(new Response { IsRaw = false, StatusCode = HttpStatusCode.BadRequest, Data = "specified Ip is invalid!" }); } } var httpResponse = GetHttpWebResponse(request); var buff = GetResponseBuff(httpResponse); var response = new Response(); response.StatusCode = httpResponse.StatusCode; response.Headers = WebHeader.FromWebHeader(httpResponse.Headers); response.RequestUri = request.Uri; response.ResponseUri = httpResponse.ResponseUri; response.IsRaw = request.IsRaw; response.Method = request.Method; if (request.UseCookie) { SetCookie(request, httpResponse.Headers.Get("Set-Cookie")); } if (request.IsRaw) { response.Data = buff; } else { var result = Decoding.GetStringFromBuff(buff, httpResponse, request.Charset); response.Charset = result.CharSet; response.Data = result.Body; response.Cookie = GetCookie(request); } httpResponse.Close(); return(response); }
/// <summary> /// request by chromium /// </summary> /// <param name="request">Crawl Request</param> /// <returns>Crawl Response</returns> public async Task <Response> RequestAsync(Request request) { var args = new List <string> { "--no-sandbox" }; if (request.Proxy != null) { args.Add("--proxy-server=" + request.Proxy.Uri.ToString()); } var launchOptions = new LaunchOptions { Headless = true, Args = args.ToArray() }; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { launchOptions.ExecutablePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "chromium", "chrome.exe"); } if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { launchOptions.ExecutablePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "chromium", "chrome"); } if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { } using (var browser = await Puppeteer.LaunchAsync(launchOptions)) using (var page = await browser.NewPageAsync()) { try { //Authenticate set if (!string.IsNullOrEmpty(request.Username) && !string.IsNullOrEmpty(request.Password)) { await page.AuthenticateAsync(new Credentials { Username = request.Username, Password = request.Password }); } //cookie set var cookie = GetCookie(request); var cookies = GenerateCookieParam(cookie); await page.SetCookieAsync(cookies); //hearder set var dic = request.Headers.ToDictionary(h => h.Name, h => h.Value); await page.SetExtraHttpHeadersAsync(dic); //ua set var ua = request.Headers.SingleOrDefault(m => m.Name == "User-Agent").Value; await page.SetUserAgentAsync(ua); var res = await page.GoToAsync(Uri.EscapeUriString(request.Uri.ToString())); if (!string.IsNullOrEmpty(request.WaitDom)) { await page.WaitForSelectorAsync("table.hpl_roomInfo"); } var htmlString = await page.GetContentAsync(); var response = new Response(); response.Headers = WebHeader.FromDictionary(res.Headers); response.Data = htmlString; response.StatusCode = res.Status; return(response); } catch (Exception ex) { throw new Exception("chromium error:" + ex.Message); } } }
/// <summary> /// request /// </summary> /// <param name="request">crawl request</param> /// <returns>crawl response</returns> public Response Request(Request request) { Logger.GetLogger(request.Elect).Info("request " + request.Uri.ToString() + " with ip:" + request.Ip + (request.Proxy != null ? (" proxy:" + request.Proxy.Ip + ":" + request.Proxy.Port) : "")); SimulateBrowser(request); try { if (request.RunJS) { var p = new PhantomCrawler(); var res = p.Request(request); if (request.UseCookie && res.Headers != null) { var cookies = res.Headers.Where(m => m.Name == "Set-Cookie").Select(m => m.Value).ToList(); if (cookies.Count > 0) { var c = string.Join("", cookies).Replace("\n", ","); SetCookie(request, c); res.Cookie = GetCookie(request); } } res.ElectInfo = request.Elect; res.Request = request; Logger.GetLogger(request.Elect).Info(request.Uri.ToString() + " response status is " + res.StatusCode.ToString()); return(res); } if (!string.IsNullOrEmpty(request.Ip)) { if (!IPHelper.IsHostIPAddress(IPAddress.Parse(request.Ip))) { return(new Response { IsRaw = false, StatusCode = HttpStatusCode.BadRequest, Data = "specified Ip is invalid!" }); } } var httpResponse = GetHttpWebResponse(request); if (httpResponse == null) { var r = new Response(); r.StatusCode = HttpStatusCode.BadRequest; r.Data = "httpResponse is null"; return(r); } var buff = GetResponseBuff(httpResponse); var response = new Response(); response.StatusCode = httpResponse.StatusCode; response.Headers = WebHeader.FromWebHeader(httpResponse.Headers); response.Request = request; response.ResponseUri = httpResponse.ResponseUri; //response.Method = request.Method; if (!string.IsNullOrEmpty(httpResponse.ContentType)) { response.IsRaw = MimeDetect.IsRaw(httpResponse.ContentType); } else { response.IsRaw = MimeDetect.IsRaw(httpResponse.ResponseUri); } if (request.UseCookie) { SetCookie(request, httpResponse.Headers.Get("Set-Cookie")); } if (response.IsRaw) { response.Data = buff; } else { var result = Decoding.GetStringFromBuff(buff, httpResponse, request.Charset); response.Charset = result.CharSet; response.Data = result.Body; response.Cookie = GetCookie(request); } httpResponse.Close(); Logger.GetLogger(request.Elect).Info(request.Uri.ToString() + " response status is " + response.StatusCode.ToString()); return(response); } catch (Exception ex) { var r = new Response(); r.StatusCode = HttpStatusCode.BadRequest; r.Data = "response error " + ex.Message; return(r); } }