private static async Task SeedRichnessAsync(EveEchoesPlanetaryProductionApiDbContext dbContext)
        {
            await foreach (var line in CsvFileService.ReadCsvDataLineByLineAsync(GlobalConstants.FilePaths.RichnessCsvFilePath))
            {
                if (string.IsNullOrWhiteSpace(line))
                {
                    continue;
                }

                var lineArgs     = line.Split(GlobalConstants.CsvDelimiter, StringSplitOptions.RemoveEmptyEntries);
                var richnessName = lineArgs[0];

                var richness = new Richness()
                {
                    Name = richnessName,
                };

                await dbContext.AddAsync(richness);
            }

            await dbContext.SaveChangesAsync();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Downloads embedded objects based on the richness.
        /// </summary>
        /// <param name="rcRequest">Request page to start from.</param>
        /// <param name="richness">Richness setting.</param>
        /// <param name="baseUri">The Uri of the website where to download embedded objects.</param>
        /// <param name="htmlContent">The HTML content of the webiste.</param>
        /// <returns>List of RCRequests of embedded objects downloaded</returns>
        private LinkedList<RCRequest> DownloadEmbeddedObjects(RCRequest rcRequest, Uri baseUri, string htmlContent, Richness richness)
        {
            LinkedList<Uri> filteredEmbeddedObjects = new LinkedList<Uri>();

            if (_killYourself || _quota < DEFAULT_LOW_WATERMARK)
            {
                return new LinkedList<RCRequest>();
            }

            LinkedList<Uri> embeddedObjects = HtmlUtils.ExtractEmbeddedObjects(baseUri, htmlContent);

            // XXX: refactor into filter class/method.
            // filter out based on richness
            foreach (Uri uri in embeddedObjects)
            {
                string uriS = uri.ToString();
                // ignore blacklisted domains
                if (IsBlacklisted(uriS))
                {
                    continue;
                }

                if (richness == Richness.Normal || (richness == Richness.Low && IsATextPage(uriS)))
                {
                    filteredEmbeddedObjects.AddLast(uri);
                }
            }
            embeddedObjects = filteredEmbeddedObjects;

            return DownloadObjectsInParallel(rcRequest, embeddedObjects);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Recursively downloads a page and its embedded objects, and its outlinks.
        /// </summary>
        /// <param name="rcRequest">Requested page to start from.</param>
        /// <param name="richness">Richness setting.</param>
        /// <param name="depth">Depth to download.</param>
        /// <returns>Wheter something was downloaded successfully.</returns>
        public bool RecursivelyDownloadPage(RCRequest rcRequest, Richness richness, int depth)
        {
            if (_killYourself || _quota < DEFAULT_LOW_WATERMARK)
            {
                // Send error page if we're on top level
                if (depth == 0)
                {
                    SendErrorPage(HttpStatusCode.InternalServerError, "Request aborted or it does not fit in quota.");
                }
                return false;
            }

            // reduce the timer
            DateTime currTime = DateTime.Now;
            DateTime endTime = StartTime.AddMilliseconds(RequestHandler.WEB_REQUEST_DEFAULT_TIMEOUT);
            if (endTime.CompareTo(currTime) > 0)
            {
                RCRequest.GenericWebRequest.Timeout = (int)(endTime.Subtract(currTime)).TotalMilliseconds;
            }
            else
            {
                RCRequest.GenericWebRequest.Timeout = 0;
            }

            // Only download for POST/... or not already existing items
            if (!IsGetOrHeadHeader() || !_proxy.ProxyCacheManager.IsCached(rcRequest.RelCacheFileName))
            {
                // Download!
                try
                {
                    // There is no index on the remote side anyway
                    rcRequest.DownloadToCache(false);
                }
                catch (Exception e)
                {
                    Logger.Warn("[depth = " + depth + "] error downloading: " + rcRequest.Uri + " " + e.Message);
                    // Send error page if we're on top level
                    if (depth == 0)
                    {
                        if (e is WebException)
                        {
                            WebException exp = e as WebException;
                            HttpWebResponse response = (e as WebException).Response as HttpWebResponse;
                            SendErrorPage(response != null ? response.StatusCode : HttpStatusCode.InternalServerError, e.Message);
                        }
                        else
                        {
                            SendErrorPage(HttpStatusCode.InternalServerError, e.Message);
                        }
                    }
                    return false;
                }
            }
            else
            {
                Logger.Debug("Already existed: " + rcRequest.Uri);
            }

            // add to the package
            if (_package.Pack(this, rcRequest, ref _quota))
            {
                Logger.Debug("[depth = " + depth + "] packed: " + rcRequest.Uri + " " + rcRequest.FileSize + " bytes, " + _quota + " left");
            }

            // add a new request for the old location if it was redirected. This will then
            // get the 301 file from the cache, so the local proxy does not need to send
            // another request to the remote proxy to find that out.
            if (rcRequest.UriBeforeRedirect != null)
            {
                Logger.Debug("Redirected: Also packing old URI with a 301 file.");
                RCRequest rc301 = new RCRequest(_proxy, (HttpWebRequest)WebRequest.Create(rcRequest.UriBeforeRedirect));
                _package.Pack(this, rc301, ref _quota);
            }

            if(!_proxy.ProxyCacheManager.IsHTMLFile(rcRequest.RelCacheFileName))
            {
                return true;
            }
            // Getting embedded objects and recursing only makes sense for html pages.
            Uri baseUri = new Uri(rcRequest.Uri);
            string htmlContent = Utils.ReadFileAsString(rcRequest.CacheFileName).ToLower();

            // get the embedded content of the search result page
            DownloadEmbeddedObjects(rcRequest, baseUri, htmlContent, richness);

            // Don't recurse if we're on the deepest layer allowed
            if (depth == Properties.Settings.Default.DEFAULT_DEPTH - 1)
            {
                return true;
            }

            // recurse
            LinkedList<Uri> resultLinkUris = HtmlUtils.ExtractLinks(baseUri, htmlContent);
            foreach (Uri uri in resultLinkUris)
            {
                RCRequest currRequest = new RCRequest(_proxy, (HttpWebRequest)WebRequest.Create(uri));
                RecursivelyDownloadPage(currRequest, richness, depth + 1);
            }
            return true;
        }