public async Task Process(WebDataPacketReader packet) { try { Consumer.PacketOpened(); do { try { Consumer.NewResource(); if (Consumer.ProcessesRequest) { string requestString = packet.GetRequestString(); await Consumer.ProcessRequest(requestString).ConfigureAwait(false); } if (Consumer.ProcessesResponse) { string responseHeaders = packet.GetResponseHeaders(); await Consumer.ProcessResponseHeaders(responseHeaders).ConfigureAwait(false); using (var response = packet.GetResponseStream()) { await Consumer.ProcessResponseStream(response).ConfigureAwait(false); } } if (Consumer.ProcessesException) { string exception = packet.GetException(); await Consumer.ProcessException(exception).ConfigureAwait(false); } } catch (Exception ex) { log.Error(ex); } finally { } ResourcesProcessedCount++; if (packet.ResourceCountSeen > WebDataPacketReader.MaxResourcesInAFile) { throw new FetchoException("Something wrong with packet - it keeps spinning"); } }while (packet.NextResource()); } catch (Exception ex) { Consumer.ReadingException(ex); } finally { Consumer.PacketClosed(); } }
/// <summary> /// Download a robots file /// </summary> /// <param name="robotsUri"></param> /// <param name="lastFetched"></param> /// <returns></returns> public static async Task <RobotsFile> DownloadRobots(Uri anyUri, DateTime?lastFetched) { RobotsFile robots = null; var robotsUri = MakeRobotsUri(anyUri); try { var ip = await Utility.GetHostIPAddress(robotsUri); /*while (!await FetchoConfiguration.Current.HostCache.WaitToFetch(ip, 60000)) * Utility.LogInfo("IP Congestion {0}", ip);*/ var bb = new BufferBlock <IWebResourceWriter>(); using (var ms = new MemoryStream()) { using (var packet = new WebDataPacketWriter(ms)) { // this is annoying, I shouldn't have to create a buffer block to get a robots file // or we should put robots into the standard flow of things await bb.SendAsync(packet); await(new HttpResourceFetcher()).Fetch(null, robotsUri, null, lastFetched, bb); } ms.Seek(0, SeekOrigin.Begin); using (var packet = new WebDataPacketReader(CreateXmlReader(ms))) { using (var stream = packet.GetResponseStream()) { if (stream == null) { robots = new RobotsFile(); } else { robots = new RobotsFile(robotsUri, stream); } } } } } catch (Exception ex) { Utility.LogInfo("Fetching {0}:", robotsUri); Utility.LogException(ex); } return(robots); }