예제 #1
0
        public async Task Process(WebDataPacketReader packet)
        {
            try
            {
                Consumer.PacketOpened();

                do
                {
                    try
                    {
                        Consumer.NewResource();

                        if (Consumer.ProcessesRequest)
                        {
                            string requestString = packet.GetRequestString();
                            await Consumer.ProcessRequest(requestString).ConfigureAwait(false);
                        }

                        if (Consumer.ProcessesResponse)
                        {
                            string responseHeaders = packet.GetResponseHeaders();
                            await Consumer.ProcessResponseHeaders(responseHeaders).ConfigureAwait(false);

                            using (var response = packet.GetResponseStream())
                            {
                                await Consumer.ProcessResponseStream(response).ConfigureAwait(false);
                            }
                        }

                        if (Consumer.ProcessesException)
                        {
                            string exception = packet.GetException();
                            await Consumer.ProcessException(exception).ConfigureAwait(false);
                        }
                    }
                    catch (Exception ex)
                    {
                        log.Error(ex);
                    }
                    finally
                    {
                    }

                    ResourcesProcessedCount++;

                    if (packet.ResourceCountSeen > WebDataPacketReader.MaxResourcesInAFile)
                    {
                        throw new FetchoException("Something wrong with packet - it keeps spinning");
                    }
                }while (packet.NextResource());
            }
            catch (Exception ex)
            {
                Consumer.ReadingException(ex);
            }
            finally
            {
                Consumer.PacketClosed();
            }
        }
예제 #2
0
        public override async Task ProcessResponseHeaders(string responseHeaders)
        {
            CountOfHeaderBytes += (ulong)responseHeaders.Length;
            ContentType         = WebDataPacketReader.GetContentTypeFromResponseHeaders(responseHeaders);

            var headers = WebDataPacketReader.GetHeaders(responseHeaders);

            if (headers.ContainsKey("content-encoding"))
            {
                Increment(ContentEncoding, headers["content-encoding"].ToLower());
            }
            else
            {
                Increment(ContentEncoding, "(not specified)");
            }

            if (headers.ContainsKey("content-language"))
            {
                Increment(ContentLanguage, headers["content-language"].ToLower());
            }
            else
            {
                Increment(ContentLanguage, "(not specified)");
            }
        }
예제 #3
0
        public override async Task ProcessException(string exception)
        {
            if (WebDataPacketReader.IsException(exception))
            {
                ExceptionCount++;

                var classification = ExceptionClassifier.Classify(exception);

                Increment(ExceptionCounts, classification.ToString());
            }
        }
예제 #4
0
        /// <summary>
        /// Download a robots file
        /// </summary>
        /// <param name="robotsUri"></param>
        /// <param name="lastFetched"></param>
        /// <returns></returns>
        public static async Task <RobotsFile> DownloadRobots(Uri anyUri, DateTime?lastFetched)
        {
            RobotsFile robots    = null;
            var        robotsUri = MakeRobotsUri(anyUri);

            try
            {
                var ip = await Utility.GetHostIPAddress(robotsUri);

                /*while (!await FetchoConfiguration.Current.HostCache.WaitToFetch(ip, 60000))
                 *  Utility.LogInfo("IP Congestion {0}", ip);*/

                var bb = new BufferBlock <IWebResourceWriter>();

                using (var ms = new MemoryStream())
                {
                    using (var packet = new WebDataPacketWriter(ms))
                    {
                        // this is annoying, I shouldn't have to create a buffer block to get a robots file
                        // or we should put robots into the standard flow of things
                        await bb.SendAsync(packet);

                        await(new HttpResourceFetcher()).Fetch(null, robotsUri, null, lastFetched, bb);
                    }
                    ms.Seek(0, SeekOrigin.Begin);

                    using (var packet = new WebDataPacketReader(CreateXmlReader(ms)))
                    {
                        using (var stream = packet.GetResponseStream())
                        {
                            if (stream == null)
                            {
                                robots = new RobotsFile();
                            }
                            else
                            {
                                robots = new RobotsFile(robotsUri, stream);
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Utility.LogInfo("Fetching {0}:", robotsUri);
                Utility.LogException(ex);
            }

            return(robots);
        }
예제 #5
0
        public override async Task ProcessRequest(string request)
        {
            CurrentUri = WebDataPacketReader.GetUriFromRequestString(request);

            ResourceCount++;
            CountOfRequestBytes += (ulong)request.Length;

            if (CurrentUri == null)
            {
                return;
            }

            var domain = domainParser.Get(CurrentUri?.Host);

            Increment(TLDCounts, domain == null ? "(blank)" : domain.TLD);
            Increment(HostCounts, CurrentUri?.Host);

            var headers = WebDataPacketReader.GetHeaders(request);

            if (headers.ContainsKey("responsetime"))
            {
                ResponseTimeMilliseconds += TimeSpan.Parse(headers["responsetime"]).TotalMilliseconds;
            }
        }