Beispiel #1
0
        /// <summary>
        /// request by chromium
        /// </summary>
        /// <param name="request">Crawl Request</param>
        /// <returns>Crawl Response</returns>
        public async Task <Response> RequestAsync(Request request)
        {
            var args = new List <string> {
                "--no-sandbox"
            };

            if (request.Proxy != null)
            {
                args.Add("--proxy-server=" + request.Proxy.Uri.ToString());
            }

            var launchOptions = new LaunchOptions {
                Headless = true, Args = args.ToArray()
            };

            using (var browser = await Puppeteer.LaunchAsync(launchOptions))
                using (var page = await browser.NewPageAsync())
                {
                    try
                    {
                        //Authenticate set
                        if (!string.IsNullOrEmpty(request.Username) && !string.IsNullOrEmpty(request.Password))
                        {
                            await page.AuthenticateAsync(new Credentials { Username = request.Username, Password = request.Password });
                        }

                        //cookie set
                        var cookie  = GetCookie(request);
                        var cookies = GenerateCookieParam(cookie);
                        await page.SetCookieAsync(cookies);

                        //hearder set
                        var dic = request.Headers.ToDictionary(h => h.Name, h => h.Value);
                        await page.SetExtraHttpHeadersAsync(dic);

                        //ua set
                        var ua = request.Headers.SingleOrDefault(m => m.Name == "User-Agent").Value;
                        await page.SetUserAgentAsync(ua);

                        var res = await page.GoToAsync(Uri.EscapeUriString(request.Uri.ToString()));

                        var htmlString = await page.GetContentAsync();

                        var response = new Response();
                        response.Headers    = WebHeader.FromDictionary(res.Headers);
                        response.Data       = htmlString;
                        response.StatusCode = res.Status;

                        return(response);
                    }
                    catch (Exception ex)
                    {
                        throw new Exception("chromium error:" + ex.Message);
                    }
                }
        }
Beispiel #2
0
        public Response Request(Request request)
        {
            if (!string.IsNullOrEmpty(request.Ip))
            {
                if (!IPHelper.IsHostIPAddress(IPAddress.Parse(request.Ip)))
                {
                    return(new Response {
                        IsRaw = false,
                        StatusCode = HttpStatusCode.BadRequest,
                        Data = "specified Ip is invalid!"
                    });
                }
            }

            var httpResponse = GetHttpWebResponse(request);
            var buff         = GetResponseBuff(httpResponse);

            var response = new Response();

            response.StatusCode  = httpResponse.StatusCode;
            response.Headers     = WebHeader.FromWebHeader(httpResponse.Headers);
            response.RequestUri  = request.Uri;
            response.ResponseUri = httpResponse.ResponseUri;
            response.IsRaw       = request.IsRaw;
            response.Method      = request.Method;

            if (request.UseCookie)
            {
                SetCookie(request, httpResponse.Headers.Get("Set-Cookie"));
            }

            if (request.IsRaw)
            {
                response.Data = buff;
            }
            else
            {
                var result = Decoding.GetStringFromBuff(buff, httpResponse, request.Charset);
                response.Charset = result.CharSet;
                response.Data    = result.Body;
                response.Cookie  = GetCookie(request);
            }
            httpResponse.Close();

            return(response);
        }
Beispiel #3
0
        /// <summary>
        /// request by chromium
        /// </summary>
        /// <param name="request">Crawl Request</param>
        /// <returns>Crawl Response</returns>
        public async Task <Response> RequestAsync(Request request)
        {
            var args = new List <string> {
                "--no-sandbox"
            };

            if (request.Proxy != null)
            {
                args.Add("--proxy-server=" + request.Proxy.Uri.ToString());
            }

            var launchOptions = new LaunchOptions {
                Headless = true, Args = args.ToArray()
            };

            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                launchOptions.ExecutablePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "chromium", "chrome.exe");
            }

            if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
            {
                launchOptions.ExecutablePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "chromium", "chrome");
            }

            if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
            {
            }

            using (var browser = await Puppeteer.LaunchAsync(launchOptions))
                using (var page = await browser.NewPageAsync())
                {
                    try
                    {
                        //Authenticate set
                        if (!string.IsNullOrEmpty(request.Username) && !string.IsNullOrEmpty(request.Password))
                        {
                            await page.AuthenticateAsync(new Credentials { Username = request.Username, Password = request.Password });
                        }

                        //cookie set
                        var cookie  = GetCookie(request);
                        var cookies = GenerateCookieParam(cookie);
                        await page.SetCookieAsync(cookies);

                        //hearder set
                        var dic = request.Headers.ToDictionary(h => h.Name, h => h.Value);
                        await page.SetExtraHttpHeadersAsync(dic);

                        //ua set
                        var ua = request.Headers.SingleOrDefault(m => m.Name == "User-Agent").Value;
                        await page.SetUserAgentAsync(ua);

                        var res = await page.GoToAsync(Uri.EscapeUriString(request.Uri.ToString()));

                        if (!string.IsNullOrEmpty(request.WaitDom))
                        {
                            await page.WaitForSelectorAsync("table.hpl_roomInfo");
                        }

                        var htmlString = await page.GetContentAsync();

                        var response = new Response();
                        response.Headers    = WebHeader.FromDictionary(res.Headers);
                        response.Data       = htmlString;
                        response.StatusCode = res.Status;

                        return(response);
                    }
                    catch (Exception ex)
                    {
                        throw new Exception("chromium error:" + ex.Message);
                    }
                }
        }
Beispiel #4
0
        /// <summary>
        /// request
        /// </summary>
        /// <param name="request">crawl request</param>
        /// <returns>crawl response</returns>
        public Response Request(Request request)
        {
            Logger.GetLogger(request.Elect).Info("request " + request.Uri.ToString() + " with ip:" + request.Ip + (request.Proxy != null ? (" proxy:" + request.Proxy.Ip + ":" + request.Proxy.Port) : ""));

            SimulateBrowser(request);

            try
            {
                if (request.RunJS)
                {
                    var p   = new PhantomCrawler();
                    var res = p.Request(request);
                    if (request.UseCookie && res.Headers != null)
                    {
                        var cookies = res.Headers.Where(m => m.Name == "Set-Cookie").Select(m => m.Value).ToList();
                        if (cookies.Count > 0)
                        {
                            var c = string.Join("", cookies).Replace("\n", ",");
                            SetCookie(request, c);
                            res.Cookie = GetCookie(request);
                        }
                    }

                    res.ElectInfo = request.Elect;
                    res.Request   = request;

                    Logger.GetLogger(request.Elect).Info(request.Uri.ToString() + " response status is " + res.StatusCode.ToString());

                    return(res);
                }

                if (!string.IsNullOrEmpty(request.Ip))
                {
                    if (!IPHelper.IsHostIPAddress(IPAddress.Parse(request.Ip)))
                    {
                        return(new Response
                        {
                            IsRaw = false,
                            StatusCode = HttpStatusCode.BadRequest,
                            Data = "specified Ip is invalid!"
                        });
                    }
                }

                var httpResponse = GetHttpWebResponse(request);
                if (httpResponse == null)
                {
                    var r = new Response();
                    r.StatusCode = HttpStatusCode.BadRequest;
                    r.Data       = "httpResponse is null";

                    return(r);
                }

                var buff = GetResponseBuff(httpResponse);

                var response = new Response();

                response.StatusCode  = httpResponse.StatusCode;
                response.Headers     = WebHeader.FromWebHeader(httpResponse.Headers);
                response.Request     = request;
                response.ResponseUri = httpResponse.ResponseUri;
                //response.Method = request.Method;

                if (!string.IsNullOrEmpty(httpResponse.ContentType))
                {
                    response.IsRaw = MimeDetect.IsRaw(httpResponse.ContentType);
                }
                else
                {
                    response.IsRaw = MimeDetect.IsRaw(httpResponse.ResponseUri);
                }

                if (request.UseCookie)
                {
                    SetCookie(request, httpResponse.Headers.Get("Set-Cookie"));
                }

                if (response.IsRaw)
                {
                    response.Data = buff;
                }
                else
                {
                    var result = Decoding.GetStringFromBuff(buff, httpResponse, request.Charset);
                    response.Charset = result.CharSet;
                    response.Data    = result.Body;
                    response.Cookie  = GetCookie(request);
                }
                httpResponse.Close();

                Logger.GetLogger(request.Elect).Info(request.Uri.ToString() + " response status is " + response.StatusCode.ToString());

                return(response);
            }
            catch (Exception ex)
            {
                var r = new Response();
                r.StatusCode = HttpStatusCode.BadRequest;
                r.Data       = "response error " + ex.Message;

                return(r);
            }
        }