示例#1
0
        /// <include file='IWebClient.xml' path='/IWebClient/SendRequest_WebRequest/*'/>
        public WebResponse SendRequest(WebRequest webRequest)
        {
            var httpWebRequest = (HttpWebRequest)System.Net.WebRequest.Create(webRequest.Destination);

            httpWebRequest.Method = webRequest.Type.ToString().ToUpperInvariant();

            // We shall handle redirects by hand so that we may capture cookies and properly
            // handle login forms.
            //
            // Automating Web Login With HttpWebRequest
            // https://www.stevefenton.co.uk/Content/Blog/Date/201210/Blog/Automating-Web-Login-With-HttpWebRequest/
            httpWebRequest.AllowAutoRedirect = false;

            // Default headers.
            httpWebRequest.Accept    = "*/*";
            httpWebRequest.UserAgent = UserAgent;

            // Set and/or override any provided headers.
            foreach (var headerName in webRequest.Headers.AllKeys)
            {
                ConfigureHeader(httpWebRequest, headerName, webRequest.Headers[headerName]);
            }

            httpWebRequest.CookieContainer = new CookieContainer();
            httpWebRequest.CookieContainer.Add(cookieJar.GetCookies(webRequest.Destination));

            if (webRequest.Type == WebRequestType.Post)
            {
                var postRequest = (PostWebRequest)webRequest;

                var    requestDataBytes = Encoding.UTF8.GetBytes(postRequest.RequestData);
                Stream requestStream    = null;

                httpWebRequest.ContentLength = requestDataBytes.Length;
                httpWebRequest.ContentType   = postRequest.ContentType;
                httpWebRequest.ServicePoint.Expect100Continue = false;

                try {
                    requestStream = httpWebRequest.GetRequestStream();
                    requestStream.Write(requestDataBytes, 0, requestDataBytes.Length);
                }
                finally {
                    requestStream?.Close();
                }
            }

            OnSendingRequest(new SendingRequestEventArgs(webRequest));

            WebResponse     response;
            HttpWebResponse webResponse = null;

            try {
                webResponse = ( HttpWebResponse )httpWebRequest.GetResponse();

                OnProcessingResponse(new ProcessingResponseEventArgs(webResponse));

                if (httpWebRequest.HaveResponse)
                {
                    var responseCookies = new CookieCollection {
                        webResponse.Cookies
                    };

                    // Some cookies in the Set-Cookie header can be omitted from the response's CookieCollection. For example:
                    //	Set-Cookie:ADCDownloadAuth=[long token];Version=1;Comment=;Domain=apple.com;Path=/;Max-Age=108000;HttpOnly;Expires=Tue, 03 May 2016 13:30:57 GMT
                    //
                    // See also:
                    // http://stackoverflow.com/questions/15103513/httpwebresponse-cookies-empty-despite-set-cookie-header-no-redirect
                    //
                    // To catch these, we parse the header manually and add any cookie that is missing.
                    if (webResponse.Headers.AllKeys.Contains(CommonHeaders.SetCookie))
                    {
                        var responseCookieList = responseCookies.OfType <Cookie>().ToList();

                        var cookies = NScrapeUtility.ParseSetCookieHeader(webResponse.Headers[CommonHeaders.SetCookie], httpWebRequest.Host);

                        foreach (var cookie in cookies)
                        {
                            if (responseCookieList.All(c => c.Name != cookie.Name))
                            {
                                responseCookies.Add(cookie);
                            }
                        }
                    }

                    // Handle cookies that are offered
                    foreach (Cookie responseCookie in responseCookies)
                    {
                        var cookieFound = false;

                        foreach (Cookie existingCookie in cookieJar.GetCookies(webRequest.Destination))
                        {
                            if (responseCookie.Name.Equals(existingCookie.Name))
                            {
                                existingCookie.Value = responseCookie.Value;
                                cookieFound          = true;
                            }
                        }

                        if (!cookieFound)
                        {
                            var args = new AddingCookieEventArgs(responseCookie);

                            OnAddingCookie(args);

                            if (!args.Cancel)
                            {
                                cookieJar.Add(responseCookie);
                            }
                        }
                    }

                    if (redirectionStatusCodes.Contains(webResponse.StatusCode))
                    {
                        // We have a redirected response, so get the new location.
                        var location = webResponse.Headers[CommonHeaders.Location];

                        // Locations should always be absolute, per the RFC (http://tools.ietf.org/html/rfc2616#section-14.30), but
                        // that won't always be the case.
                        Uri redirectUri;
                        if (Uri.IsWellFormedUriString(location, UriKind.Absolute))
                        {
                            redirectUri = new Uri(location);
                        }
                        else
                        {
                            redirectUri = new Uri(webRequest.Destination, new Uri(location, UriKind.Relative));
                        }

                        if (webRequest.AutoRedirect)
                        {
                            // We are auto redirecting, so make a recursive call to perform the redirect by hand.
                            response = SendRequest(new GetWebRequest(redirectUri));
                        }
                        else
                        {
                            // We are not auto redirecting, so send the caller a redirect response.
                            response = new RedirectedWebResponse(webResponse.ResponseUri, webRequest, redirectUri);
                        }

                        webResponse.Dispose();
                    }
                    else
                    {
                        // We have a non-redirected response.
                        response = WebResponseFactory.CreateResponse(webResponse);

                        if (response.ResponseType == WebResponseType.Html)
                        {
                            // We have an HTML response, so check for an old school Meta refresh tag
                            var metaRefreshUrl = GetMetaRefreshUrl((( HtmlWebResponse )response).Html);

                            if (!string.IsNullOrWhiteSpace(metaRefreshUrl))
                            {
                                // The page has a Meta refresh tag, so build the redirect Url
                                var redirectUri = new Uri(response.ResponseUrl, metaRefreshUrl);

                                if (webRequest.AutoRedirect)
                                {
                                    response.Dispose();

                                    // We are auto redirecting, so make a recursive call to perform the redirect
                                    response = SendRequest(new GetWebRequest(redirectUri, httpWebRequest.AllowAutoRedirect));
                                }
                                else
                                {
                                    var responseUrl = response.ResponseUrl;

                                    response.Dispose();

                                    // We are not auto redirecting, so send the caller a redirect response
                                    response = new RedirectedWebResponse(responseUrl, webRequest, redirectUri);
                                }
                            }
                        }
                    }
                }
                else
                {
                    response = new ExceptionWebResponse(webRequest.Destination, new WebException(NScrapeResources.NoResponse));

                    webResponse.Dispose();
                }
            }
            catch (WebException ex) {
                response = new ExceptionWebResponse(webRequest.Destination, ex);

                webResponse?.Dispose();
            }

            return(response);
        }
示例#2
0
        /// <include file='IWebClient.xml' path='/IWebClient/SendRequest_WebRequest/*'/>
        public WebResponse SendRequest(WebRequest webRequest)
        {
            var httpWebRequest = (HttpWebRequest)System.Net.WebRequest.Create(webRequest.Destination);

            httpWebRequest.Method = webRequest.Type.ToString().ToUpperInvariant();

            // We shall handle redirects by hand so that we may capture cookies and properly
            // handle login forms.
            //
            // Automating Web Login With HttpWebRequest
            // https://www.stevefenton.co.uk/Content/Blog/Date/201210/Blog/Automating-Web-Login-With-HttpWebRequest/
            httpWebRequest.AllowAutoRedirect = false;

            // Default headers.
            httpWebRequest.Accept    = "*/*";
            httpWebRequest.UserAgent = UserAgent;

            // Set and/or override any provided headers.
            foreach (var headerName in webRequest.Headers.AllKeys)
            {
                ConfigureHeader(httpWebRequest, headerName, webRequest.Headers[headerName]);
            }

            httpWebRequest.CookieContainer = new CookieContainer();
            httpWebRequest.CookieContainer.Add(webRequest.Destination, cookieJar.GetCookies(webRequest.Destination));

            if (webRequest.Type == WebRequestType.Post)
            {
                var postRequest = (PostWebRequest)webRequest;

                var    requestDataBytes = Encoding.UTF8.GetBytes(postRequest.RequestData);
                Stream requestStream    = null;

                httpWebRequest.ContentLength = requestDataBytes.Length;
                httpWebRequest.ContentType   = postRequest.ContentType;

                try {
                    requestStream = httpWebRequest.GetRequestStreamAsync().Result;
                    requestStream.Write(requestDataBytes, 0, requestDataBytes.Length);
                }
                finally {
                    requestStream?.Dispose();
                }
            }

            OnSendingRequest(new SendingRequestEventArgs(webRequest));

            WebResponse     response;
            HttpWebResponse webResponse = null;

            try {
                try {
                    webResponse = ( HttpWebResponse )httpWebRequest.GetResponseAsync().Result;
                }
                catch (AggregateException ex) {
                    // While the line above executes without exception under the .Net Framework, under
                    // .Net Core, it will throw an exception for non-successful (non-200) status codes.
                    // However, the response we need is buried within the exception, so pull it out and
                    // continue.
                    //
                    // See thread on the following page, notably the comment from davidsh on Sep 6, 2017:
                    //
                    // HttpWebRequest in .NET Core 2.0 throwing 301 Moved Permanently #23422
                    // https://github.com/dotnet/corefx/issues/23422
                    if (ex.InnerExceptions.Count == 1)
                    {
                        if (ex.InnerExceptions[0] is WebException webException)
                        {
                            if (webException.Response is HttpWebResponse httpWebResponse)
                            {
                                webResponse = httpWebResponse;
                            }
                        }
                    }

                    if (webResponse == null)
                    {
                        // The exception was not as expected so we can't process it.
                        throw;
                    }
                }

                OnProcessingResponse(new ProcessingResponseEventArgs(webResponse));

                if (httpWebRequest.HaveResponse)
                {
                    var responseCookies = new CookieCollection {
                        webResponse.Cookies
                    };

                    // Some cookies in the Set-Cookie header can be omitted from the response's CookieCollection. For example:
                    //	Set-Cookie:ADCDownloadAuth=[long token];Version=1;Comment=;Domain=apple.com;Path=/;Max-Age=108000;HttpOnly;Expires=Tue, 03 May 2016 13:30:57 GMT
                    //
                    // See also:
                    // http://stackoverflow.com/questions/15103513/httpwebresponse-cookies-empty-despite-set-cookie-header-no-redirect
                    //
                    // To catch these, we parse the header manually and add any cookie that is missing.
                    if (webResponse.Headers.AllKeys.Contains(CommonHeaders.SetCookie))
                    {
                        var responseCookieList = responseCookies.OfType <Cookie>().ToList();

                        var host    = httpWebRequest.Host;
                        var cookies = NScrapeUtility.ParseSetCookieHeader(webResponse.Headers[CommonHeaders.SetCookie], host);

                        foreach (var cookie in cookies)
                        {
                            if (responseCookieList.All(c => c.Name != cookie.Name))
                            {
                                responseCookies.Add(cookie);
                            }
                        }
                    }

                    // Handle cookies that are offered
                    foreach (Cookie responseCookie in responseCookies)
                    {
                        var cookieFound = false;

                        foreach (Cookie existingCookie in cookieJar.GetCookies(webRequest.Destination))
                        {
                            if (responseCookie.Name.Equals(existingCookie.Name))
                            {
                                existingCookie.Value = responseCookie.Value;
                                cookieFound          = true;
                            }
                        }

                        if (!cookieFound)
                        {
                            var args = new AddingCookieEventArgs(responseCookie);

                            OnAddingCookie(args);

                            if (!args.Cancel)
                            {
                                // .NET Core seems to enforce the fact that the cookie domain _must_ start with a dot,
                                // so let's make sure that's the case.
                                if (!string.IsNullOrEmpty(responseCookie.Domain) && !responseCookie.Domain.StartsWith("."))
                                {
                                    responseCookie.Domain = "." + responseCookie.Domain;
                                }

                                string url = responseCookie.Secure ? "https://" : "http://";
                                url += responseCookie.Domain.Substring(1);

                                var uri = new Uri(url);
                                cookieJar.Add(uri, responseCookie);
                            }
                        }
                    }

                    if (redirectionStatusCodes.Contains(webResponse.StatusCode))
                    {
                        // We have a redirected response, so get the new location.
                        var location = webResponse.Headers[CommonHeaders.Location];

                        // Locations should always be absolute, per the RFC (http://tools.ietf.org/html/rfc2616#section-14.30), but
                        // that won't always be the case.
                        Uri redirectUri;
                        if (Uri.IsWellFormedUriString(location, UriKind.Absolute))
                        {
                            redirectUri = new Uri(location);
                        }
                        else
                        {
                            redirectUri = new Uri(webRequest.Destination, new Uri(location, UriKind.Relative));
                        }

                        if (webRequest.AutoRedirect)
                        {
                            // Dispose webResponse before auto redirect, otherwise the connection will not be closed until all the auto redirect finished.
                            // http://www.wadewegner.com/2007/08/systemnetwebexception-when-issuing-more-than-two-concurrent-webrequests/
                            webResponse.Dispose();

                            // We are auto redirecting, so make a recursive call to perform the redirect by hand.
                            response = SendRequest(new GetWebRequest(redirectUri));
                        }
                        else
                        {
                            var responseUri = webResponse.ResponseUri;
                            webResponse.Dispose();

                            // We are not auto redirecting, so send the caller a redirect response.
                            response = new RedirectedWebResponse(responseUri, webRequest, redirectUri);
                        }
                    }
                    else
                    {
                        // We have a non-redirected response.
                        response = WebResponseFactory.CreateResponse(webResponse);

                        if (response.ResponseType == WebResponseType.Html)
                        {
                            // We have an HTML response, so check for an old school Meta refresh tag
                            var metaRefreshUrl = GetMetaRefreshUrl((( HtmlWebResponse )response).Html);

                            if (!string.IsNullOrWhiteSpace(metaRefreshUrl))
                            {
                                // The page has a Meta refresh tag, so build the redirect URL
                                var redirectUri = new Uri(response.ResponseUrl, metaRefreshUrl);

                                if (webRequest.AutoRedirect)
                                {
                                    // Dispose webResponse before auto redirect, otherwise the connection will not be closed until all the auto redirect finished.
                                    // http://www.wadewegner.com/2007/08/systemnetwebexception-when-issuing-more-than-two-concurrent-webrequests/
                                    response.Dispose();

                                    // We are auto redirecting, so make a recursive call to perform the redirect
                                    response = SendRequest(new GetWebRequest(redirectUri, httpWebRequest.AllowAutoRedirect));
                                }
                                else
                                {
                                    var responseUrl = response.ResponseUrl;

                                    response.Dispose();

                                    // We are not auto redirecting, so send the caller a redirect response
                                    response = new RedirectedWebResponse(responseUrl, webRequest, redirectUri);
                                }
                            }
                        }
                    }
                }
                else
                {
                    response = new ExceptionWebResponse(webRequest.Destination, new WebException(Properties.Resources.NoResponse));

                    webResponse.Dispose();
                }
            }
            catch (WebException ex) {
                response = new ExceptionWebResponse(webRequest.Destination, ex);

                webResponse?.Dispose();
            }

            return(response);
        }