Esempio n. 1
0
        /// <summary>
        /// Fetch a robot file for a uri
        /// </summary>
        /// <param name="anyUri">Any URI for which you want the robots file for</param>
        /// <returns></returns>
        public static async Task <RobotsFile> GetFile(Uri anyUri)
        {
            //log.Debug("Downloading robots: " + uri);

            Site       site          = null;
            RobotsFile robotsFile    = null;
            var        robotsUri     = MakeRobotsUri(anyUri);
            bool       needsVisiting = true;

            try
            {
                var db = await DatabasePool.GetDatabaseAsync();

                site = await db.GetSite(robotsUri);

                await DatabasePool.GiveBackToPool(db);

                if (site != null)
                {
                    needsVisiting = site.RobotsNeedsVisiting;
                }
                else
                {
                    site = MakeNewSite(anyUri);
                }

                if (needsVisiting)
                {
                    if (site != null && site.IsBlocked)
                    {
                        Utility.LogInfo("Can't get robots file as site is blocked by policy: " + robotsUri);
                        return(null);
                    }

                    robotsFile = await DownloadRobots(robotsUri, site.LastRobotsFetched);

                    site.LastRobotsFetched = DateTime.UtcNow;
                    site.RobotsFile        = robotsFile;
                    db = await DatabasePool.GetDatabaseAsync();

                    await db.SaveSite(site);

                    await DatabasePool.GiveBackToPool(db);
                }
                else
                {
                    robotsFile = site.RobotsFile;
                }
            }
            catch (Exception ex)
            {
                Utility.LogException(ex);
            }
            return(robotsFile);
        }
        public async void OutputResponse(WebResponse response, byte[] buffer, int bytesRead)
        {
            try
            {
                Writer.OutputResponse(response, buffer, bytesRead);

                // bail if we dont get anything
                if (bytesRead == 0)
                {
                    return;
                }

                // if we need to push it to this workspace
                if (queueItem is ImmediateWorkspaceQueueItem wqi)
                {
                    try
                    {
                        var sb = new StringBuilder();

                        if (response is HttpWebResponse httpWebResponse)
                        {
                            sb.AppendFormat("status: {0} {1}\n", httpWebResponse.StatusCode, httpWebResponse.StatusDescription);
                        }

                        foreach (string key in response.Headers)
                        {
                            sb.AppendFormat("{0}: {1}\n", key, response.Headers[key]);
                        }

                        responseHeaders = sb.ToString();

                        using (var ms = new MemoryStream(buffer))
                        {
                            var builder = new WorkspaceResultBuilder();
                            var result  = builder.Build(ms, requestString, responseHeaders, out string evalText);
                            result.Tags.AddRange(wqi.Tags);

                            var hash = MD5Hash.Compute(buffer);
                            var db   = await DatabasePool.GetDatabaseAsync();

                            try
                            {
                                await db.AddWorkspaceResults(wqi.DestinationWorkspaceId, new[] { result });

                                // OPTIMISE: Remove ToArray and just pass IEnumerable<>
                                await db.AddWebResourceDataCache(hash, buffer.Take(bytesRead).ToArray());
                            }
                            catch (Exception ex)
                            {
                                Utility.LogException(ex);
                            }
                            finally
                            {
                                await DatabasePool.GiveBackToPool(db);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Utility.LogException(ex);
                    }
                }
            }
            catch (Exception ex)
            {
                Utility.LogException(ex);
            }
        }