示例#1
0
        public async Task Process(WebDataPacketReader packet)
        {
            try
            {
                Consumer.PacketOpened();

                do
                {
                    try
                    {
                        Consumer.NewResource();

                        if (Consumer.ProcessesRequest)
                        {
                            string requestString = packet.GetRequestString();
                            await Consumer.ProcessRequest(requestString).ConfigureAwait(false);
                        }

                        if (Consumer.ProcessesResponse)
                        {
                            string responseHeaders = packet.GetResponseHeaders();
                            await Consumer.ProcessResponseHeaders(responseHeaders).ConfigureAwait(false);

                            using (var response = packet.GetResponseStream())
                            {
                                await Consumer.ProcessResponseStream(response).ConfigureAwait(false);
                            }
                        }

                        if (Consumer.ProcessesException)
                        {
                            string exception = packet.GetException();
                            await Consumer.ProcessException(exception).ConfigureAwait(false);
                        }
                    }
                    catch (Exception ex)
                    {
                        log.Error(ex);
                    }
                    finally
                    {
                    }

                    ResourcesProcessedCount++;

                    if (packet.ResourceCountSeen > WebDataPacketReader.MaxResourcesInAFile)
                    {
                        throw new FetchoException("Something wrong with packet - it keeps spinning");
                    }
                }while (packet.NextResource());
            }
            catch (Exception ex)
            {
                Consumer.ReadingException(ex);
            }
            finally
            {
                Consumer.PacketClosed();
            }
        }
示例#2
0
        /// <summary>
        /// Download a robots file
        /// </summary>
        /// <param name="robotsUri"></param>
        /// <param name="lastFetched"></param>
        /// <returns></returns>
        public static async Task <RobotsFile> DownloadRobots(Uri anyUri, DateTime?lastFetched)
        {
            RobotsFile robots    = null;
            var        robotsUri = MakeRobotsUri(anyUri);

            try
            {
                var ip = await Utility.GetHostIPAddress(robotsUri);

                /*while (!await FetchoConfiguration.Current.HostCache.WaitToFetch(ip, 60000))
                 *  Utility.LogInfo("IP Congestion {0}", ip);*/

                var bb = new BufferBlock <IWebResourceWriter>();

                using (var ms = new MemoryStream())
                {
                    using (var packet = new WebDataPacketWriter(ms))
                    {
                        // this is annoying, I shouldn't have to create a buffer block to get a robots file
                        // or we should put robots into the standard flow of things
                        await bb.SendAsync(packet);

                        await(new HttpResourceFetcher()).Fetch(null, robotsUri, null, lastFetched, bb);
                    }
                    ms.Seek(0, SeekOrigin.Begin);

                    using (var packet = new WebDataPacketReader(CreateXmlReader(ms)))
                    {
                        using (var stream = packet.GetResponseStream())
                        {
                            if (stream == null)
                            {
                                robots = new RobotsFile();
                            }
                            else
                            {
                                robots = new RobotsFile(robotsUri, stream);
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Utility.LogInfo("Fetching {0}:", robotsUri);
                Utility.LogException(ex);
            }

            return(robots);
        }