/// <summary> /// Fetch a robot file for a uri /// </summary> /// <param name="anyUri">Any URI for which you want the robots file for</param> /// <returns></returns> public static async Task <RobotsFile> GetFile(Uri anyUri) { //log.Debug("Downloading robots: " + uri); Site site = null; RobotsFile robotsFile = null; var robotsUri = MakeRobotsUri(anyUri); bool needsVisiting = true; try { var db = await DatabasePool.GetDatabaseAsync(); site = await db.GetSite(robotsUri); await DatabasePool.GiveBackToPool(db); if (site != null) { needsVisiting = site.RobotsNeedsVisiting; } else { site = MakeNewSite(anyUri); } if (needsVisiting) { if (site != null && site.IsBlocked) { Utility.LogInfo("Can't get robots file as site is blocked by policy: " + robotsUri); return(null); } robotsFile = await DownloadRobots(robotsUri, site.LastRobotsFetched); site.LastRobotsFetched = DateTime.UtcNow; site.RobotsFile = robotsFile; db = await DatabasePool.GetDatabaseAsync(); await db.SaveSite(site); await DatabasePool.GiveBackToPool(db); } else { robotsFile = site.RobotsFile; } } catch (Exception ex) { Utility.LogException(ex); } return(robotsFile); }
public async void OutputResponse(WebResponse response, byte[] buffer, int bytesRead) { try { Writer.OutputResponse(response, buffer, bytesRead); // bail if we dont get anything if (bytesRead == 0) { return; } // if we need to push it to this workspace if (queueItem is ImmediateWorkspaceQueueItem wqi) { try { var sb = new StringBuilder(); if (response is HttpWebResponse httpWebResponse) { sb.AppendFormat("status: {0} {1}\n", httpWebResponse.StatusCode, httpWebResponse.StatusDescription); } foreach (string key in response.Headers) { sb.AppendFormat("{0}: {1}\n", key, response.Headers[key]); } responseHeaders = sb.ToString(); using (var ms = new MemoryStream(buffer)) { var builder = new WorkspaceResultBuilder(); var result = builder.Build(ms, requestString, responseHeaders, out string evalText); result.Tags.AddRange(wqi.Tags); var hash = MD5Hash.Compute(buffer); var db = await DatabasePool.GetDatabaseAsync(); try { await db.AddWorkspaceResults(wqi.DestinationWorkspaceId, new[] { result }); // OPTIMISE: Remove ToArray and just pass IEnumerable<> await db.AddWebResourceDataCache(hash, buffer.Take(bytesRead).ToArray()); } catch (Exception ex) { Utility.LogException(ex); } finally { await DatabasePool.GiveBackToPool(db); } } } catch (Exception ex) { Utility.LogException(ex); } } } catch (Exception ex) { Utility.LogException(ex); } }