protected virtual bool ShouldRecrawlPage(CrawledPage crawledPage) { //TODO No unit tests cover these lines var shouldRecrawlPageDecision = _crawlDecisionMaker.ShouldRecrawlPage(crawledPage, _crawlContext); if (shouldRecrawlPageDecision.Allow) { shouldRecrawlPageDecision = (_shouldRecrawlPageDecisionMaker != null) ? _shouldRecrawlPageDecisionMaker.Invoke(crawledPage, _crawlContext) : new CrawlDecision { Allow = true } } ; if (!shouldRecrawlPageDecision.Allow) { _logger.LogDebug($"Page [{crawledPage.Uri.AbsoluteUri}] not recrawled, [{shouldRecrawlPageDecision.Reason}]"); } else { // Look for the Retry-After header in the response. crawledPage.RetryAfter = null; if (crawledPage.HttpWebResponse != null && crawledPage.HttpWebResponse.Headers != null) { var value = crawledPage.HttpWebResponse.GetResponseHeader("Retry-After"); if (!String.IsNullOrEmpty(value)) { // Try to convert to DateTime first, then in double. DateTime date; double seconds; if (crawledPage.LastRequest.HasValue && DateTime.TryParse(value, out date)) { crawledPage.RetryAfter = (date - crawledPage.LastRequest.Value).TotalSeconds; } else if (double.TryParse(value, out seconds)) { crawledPage.RetryAfter = seconds; } } } } SignalCrawlStopIfNeeded(shouldRecrawlPageDecision); return(shouldRecrawlPageDecision.Allow); }
protected virtual bool ShouldRecrawlPage(CrawledPage crawledPage) { //TODO No unit tests cover these lines var shouldRecrawlPageDecision = _crawlDecisionMaker.ShouldRecrawlPage(crawledPage, _crawlContext); if (shouldRecrawlPageDecision.Allow) { shouldRecrawlPageDecision = (ShouldRecrawlPageDecisionMaker != null) ? ShouldRecrawlPageDecisionMaker(crawledPage, _crawlContext) : new CrawlDecision { Allow = true } } ; if (!shouldRecrawlPageDecision.Allow) { Log.DebugFormat("Page [{0}] not recrawled, [{1}]", crawledPage.Uri.AbsoluteUri, shouldRecrawlPageDecision.Reason); } else { // Look for the Retry-After header in the response. crawledPage.RetryAfter = null; var value = crawledPage.HttpResponseMessage?.Headers?.RetryAfter?.ToString(); if (!String.IsNullOrEmpty(value)) { // Try to convert to DateTime first, then in double. DateTime date; double seconds; if (crawledPage.LastRequest.HasValue && DateTime.TryParse(value, out date)) { crawledPage.RetryAfter = (date - crawledPage.LastRequest.Value).TotalSeconds; } else if (double.TryParse(value, out seconds)) { crawledPage.RetryAfter = seconds; } } } SignalCrawlStopIfNeeded(shouldRecrawlPageDecision); return(shouldRecrawlPageDecision.Allow); }
protected virtual bool ShouldRecrawlPage(CrawledPage crawledPage) { //TODO No unit tests cover these lines CrawlDecision shouldRecrawlPageDecision = _crawlDecisionMaker.ShouldRecrawlPage(crawledPage, _crawlContext); if (shouldRecrawlPageDecision.Allow) { shouldRecrawlPageDecision = (_shouldRecrawlPageDecisionMaker != null) ? _shouldRecrawlPageDecisionMaker.Invoke(crawledPage, _crawlContext) : new CrawlDecision { Allow = true } } ; if (!shouldRecrawlPageDecision.Allow) { _logger.DebugFormat("Page [{0}] not recrawled, [{1}]", crawledPage.Uri.AbsoluteUri, shouldRecrawlPageDecision.Reason); } SignalCrawlStopIfNeeded(shouldRecrawlPageDecision); return(shouldRecrawlPageDecision.Allow); }
protected virtual bool ShouldRecrawlPage(CrawledPage crawledPage) { //TODO No unit tests cover these lines CrawlDecision shouldRecrawlPageDecision = _crawlDecisionMaker.ShouldRecrawlPage(crawledPage, _crawlContext); if (shouldRecrawlPageDecision.Allow) { shouldRecrawlPageDecision = (_shouldRecrawlPageDecisionMaker != null) ? _shouldRecrawlPageDecisionMaker.Invoke(crawledPage, _crawlContext) : CrawlDecision.AllowCrawl(); } //if (!shouldRecrawlPageDecision.Allow) //{ // //_logger.LogDebug("Page [{0}] not recrawled, [{1}]", crawledPage.Uri.AbsoluteUri, shouldRecrawlPageDecision.Reason); //} //else //{ // Look for the Retry-After header in the response. //crawledPage.RetryAfter = null; //if (crawledPage.HttpWebResponse != null && // crawledPage.HttpWebResponse.Headers != null) //{ // string value = crawledPage.HttpWebResponse.GetResponseHeader("Retry-After"); // if (!String.IsNullOrEmpty(value)) // { // // Try to convert to DateTime first, then in double. // DateTime date; // double seconds; // if (crawledPage.LastRequest.HasValue && DateTime.TryParse(value, out date)) // { // crawledPage.RetryAfter = (date - crawledPage.LastRequest.Value).TotalSeconds; // } // else if (double.TryParse(value, out seconds)) // { // crawledPage.RetryAfter = seconds; // } // } //} //} return(shouldRecrawlPageDecision.Allow); }