Example #1
0
 private static void CallComplete <T>(RequestState <T> requestState, HttpWebResponse response)
 {
     if (response != null)
     {
         requestState.CallComplete(
             new PropertyBag
         {
             Step                    = requestState.CrawlStep,
             CharacterSet            = response.CharacterSet,
             ContentEncoding         = response.ContentEncoding,
             ContentType             = response.ContentType,
             Headers                 = response.Headers,
             IsMutuallyAuthenticated = response.IsMutuallyAuthenticated,
             IsFromCache             = response.IsFromCache,
             LastModified            = response.LastModified,
             Method                  = response.Method,
             ProtocolVersion         = response.ProtocolVersion,
             ResponseUri             = response.ResponseUri,
             Server                  = response.Server,
             StatusCode              = response.StatusCode,
             StatusDescription       = response.StatusDescription,
             GetResponse             = requestState.ResponseBuffer.IsNull()
                                                         ? (Func <Stream>)(() => new MemoryStream())
                                                         : requestState.ResponseBuffer.GetReaderStream,
             DownloadTime = requestState.DownloadTimer.Elapsed,
         }, null);
     }
     else
     {
         requestState.CallComplete(
             new PropertyBag
         {
             Step                    = requestState.CrawlStep,
             CharacterSet            = string.Empty,
             ContentEncoding         = null,
             ContentType             = null,
             Headers                 = null,
             IsMutuallyAuthenticated = false,
             IsFromCache             = false,
             LastModified            = DateTime.Now,
             Method                  = string.Empty,
             ProtocolVersion         = null,
             ResponseUri             = null,
             Server                  = string.Empty,
             StatusCode              = HttpStatusCode.Forbidden,
             StatusDescription       = string.Empty,
             GetResponse             = requestState.ResponseBuffer.IsNull()
                                                         ? (Func <Stream>)(() => new MemoryStream())
                                                         : requestState.ResponseBuffer.GetReaderStream,
             DownloadTime = requestState.DownloadTimer.Elapsed,
         }, null);
     }
 }
Example #2
0
        private void DownloadAsync <T>(RequestState <T> requestState, Exception exception)
        {
            if (!exception.IsNull() && RetryWaitDuration.HasValue)
            {
                Thread.Sleep(RetryWaitDuration.Value);
            }

            if (requestState.Retry-- > 0)
            {
                requestState.Clean();
                requestState.Request        = (HttpWebRequest)WebRequest.Create(requestState.CrawlStep.Uri);
                requestState.Request.Method = requestState.Method.ToString();
                SetDefaultRequestProperties(requestState.Request);
                IAsyncResult asyncResult = requestState.Request.BeginGetResponse(null, requestState);
                asyncResult.FromAsync((ia, isTimeout) =>
                {
                    if (isTimeout)
                    {
                        DownloadAsync(requestState, new TimeoutException("Connection Timeout"));
                    }
                    else
                    {
                        ResponseCallback <T>(ia);
                    }
                }, ConnectionTimeout);
            }
            else
            {
                requestState.CallComplete(null, exception);
            }
        }
        private static void CallComplete <T>(RequestState <T> requestState, HttpResponseMessage response)
        {
            PropertyBag propertyBag;

            if (response != null)
            {
                propertyBag = new PropertyBag
                {
                    Step            = requestState.CrawlStep,
                    CharacterSet    = response.Content.Headers.ContentType.CharSet,
                    ContentEncoding = response.Content.Headers.ContentEncoding.FirstOrDefault(),
                    ContentType     = response.Content.Headers.ContentType.MediaType,
                    Headers         = response.Headers,

                    // Mutually authenticated requests not supported
                    IsMutuallyAuthenticated = false,

                    // We always load data not from cache.
                    IsFromCache       = false,
                    LastModified      = response.Content.Headers.LastModified,
                    Method            = response.RequestMessage.Method.ToString().ToUpperInvariant(),
                    ProtocolVersion   = response.RequestMessage.Version,
                    ResponseUri       = response.RequestMessage.RequestUri,
                    Server            = response.Headers.Server.Select(_ => _.Product?.Name + " " + _.Product?.Version).FirstOrDefault(),
                    StatusCode        = response.StatusCode,
                    StatusDescription = response.ReasonPhrase,
                    GetResponse       = requestState.ResponseBuffer.IsNull()
                                ? (Func <Stream>)(() => new MemoryStream())
                                : requestState.ResponseBuffer.GetReaderStream,
                    DownloadTime = requestState.DownloadTimer.Elapsed,
                };
            }
            else
            {
                propertyBag = new PropertyBag
                {
                    Step                    = requestState.CrawlStep,
                    CharacterSet            = string.Empty,
                    ContentEncoding         = null,
                    ContentType             = null,
                    Headers                 = null,
                    IsMutuallyAuthenticated = false,
                    IsFromCache             = false,
                    LastModified            = DateTime.Now,
                    Method                  = string.Empty,
                    ProtocolVersion         = null,
                    ResponseUri             = null,
                    Server                  = string.Empty,
                    StatusCode              = HttpStatusCode.Forbidden,
                    StatusDescription       = string.Empty,
                    GetResponse             = requestState.ResponseBuffer.IsNull()
                                ? (Func <Stream>)(() => new MemoryStream())
                                : requestState.ResponseBuffer.GetReaderStream,
                    DownloadTime = requestState.DownloadTimer.Elapsed,
                };
            }

            requestState.CallComplete(propertyBag, null);
        }
        private async Task <RequestState <T> > DownloadAsync <T>(RequestState <T> requestState)
        {
            if (requestState.Retry-- <= 0)
            {
                requestState.CallComplete(null, new TimeoutException("Connection Timeout"));
                return(requestState);
            }

            requestState.Clean();
            var clientHandler = new HttpClientHandler();
            var client        = new HttpClient(clientHandler);
            var request       = new HttpRequestMessage(requestState.Method, requestState.CrawlStep.Uri);

            requestState.Request = request;
            SetDefaultRequestProperties(clientHandler, request);

            Exception           unhandledException = null;
            HttpResponseMessage httpResponse       = null;

            try
            {
                httpResponse = await client.SendAsync(request).WithTimeout(this.ConnectionTimeout);

                var response           = httpResponse.Content;
                var downloadBufferSize = this.DownloadBufferSize ?? DefaultDownloadBufferSize;
                var contentLength      = response.Headers.ContentLength ?? throw new InvalidOperationException("Content length not specified");
                requestState.ResponseBuffer = new MemoryStreamWithFileBackingStore((int)contentLength,
                                                                                   this.MaximumDownloadSizeInRam ?? int.MaxValue,
                                                                                   (int)downloadBufferSize);

                // Read the response into a Stream object.
                var responseStream = await response.ReadAsStreamAsync().WithTimeout(this.ReadTimeout);

                await responseStream.CopyToAsync(requestState.ResponseBuffer,
                                                 (source, dest, exception) =>
                {
                    if (exception.IsNull())
                    {
                        CallComplete(requestState, httpResponse);
                    }
                    else
                    {
                        // Put delay here in case of error.
                        //DownloadWithRetryAsync(requestState, exception);
                    }
                },
                                                 bd =>
                {
                    requestState.DownloadProgress?.Invoke(new DownloadProgressEventArgs
                    {
                        Referrer            = requestState.Referrer,
                        Step                = requestState.CrawlStep,
                        BytesReceived       = bd,
                        TotalBytesToReceive = (uint)httpResponse.Content.Headers.ContentLength,
                        DownloadTime        = requestState.StartTime - DateTime.UtcNow,
                    });
                },
                                                 downloadBufferSize, this.MaximumContentSize, this.ReadTimeout);

                CallComplete(requestState, httpResponse);
            }
            catch (HttpRequestException)
            {
                CallComplete(requestState, httpResponse);
            }
            catch (Exception e)
            {
                unhandledException = e;
            }

            if (unhandledException != null)
            {
                return(await DownloadWithRetryAsync(requestState));
            }

            return(requestState);
        }