public bool ProcessSite(string domain) { domain = SiteInfo.NormalizeDomain(domain); if (!SiteInfo.IsValidDomain(domain)) { throw new ArgumentException("Invalid domain"); } return(_crawler.ProcessSite(domain)); }
public ActionResult Index(SiteAddressModel siteAddress) { if (ModelState.IsValid) { if ((siteAddress.Address == null) || !SiteInfo.IsValidDomain(SiteInfo.NormalizeDomain(siteAddress.Address))) { ModelState.AddModelError("", "Invalid domain"); return(View(siteAddress)); } return(RedirectToAction("ShowMap", "Map", new { domain = SiteInfo.NormalizeDomain(siteAddress.Address) })); } return(View(siteAddress)); }
public Site GetSite(string domain, bool includeContents, long?contentsTimeStamp = null, bool createIfNecessary = false) { domain = SiteInfo.NormalizeDomain(domain); if (!SiteInfo.IsValidDomain(domain)) { throw new ArgumentException("Invalid domain"); } var site = _siteRepository.GetSite(domain, includeContents, contentsTimeStamp); bool needToCreate = (site == null) && createIfNecessary; needToCreate = needToCreate || ((site != null) && (DateTime.UtcNow - site.Info.StatusTime > TimeSpan.FromDays(RefreshPeriodInDays))); needToCreate = needToCreate || ((site != null) && ((site.Info.Status == SiteStatus.ConnectionProblem) || (site.Info.Status == SiteStatus.RobotsTxtProblem)) && (DateTime.UtcNow - site.Info.StatusTime > TimeSpan.FromMinutes(10))); var needToProcess = ((site != null) && ((site.Info.Status == SiteStatus.Added) || (site.Info.Status == SiteStatus.Processing))); if ((site != null) && !site.Info.RefreshEnabled) { needToProcess = false; } if (needToCreate || needToProcess) { if (needToCreate) { site = new Site(); site.Info.Domain = domain; if (_siteRepository.SaveSite(site, true)) { _crawler.ProcessSite(domain); } else { site = null; } } else { _crawler.ProcessSite(domain); } } return(site); }
public Site GetSite(string domain, bool includeContents, long?contentsTimeStamp = null) { domain = SiteInfo.NormalizeDomain(domain); if (!SiteInfo.IsValidDomain(domain)) { throw new ArgumentException("Invalid domain"); } var site = _siteRepository.GetSite(domain, includeContents, contentsTimeStamp); bool needToProcess = site == null; // Need to process as info is too old needToProcess = needToProcess || ((site != null) && (DateTime.UtcNow - site.Info.StatusTime > TimeSpan.FromDays(RefreshPeriodInDays))); // Need to process as there was a connection or robots.txt error needToProcess = needToProcess || ((site != null) && ((site.Info.Status == SiteStatus.ConnectionProblem) || (site.Info.Status == SiteStatus.RobotsTxtProblem)) && (DateTime.UtcNow - site.Info.StatusTime > TimeSpan.FromMinutes(10))); // Need to process as processing seems to be interrupted needToProcess = needToProcess || ((site != null) && ((site.Info.Status == SiteStatus.Added) || (site.Info.Status == SiteStatus.Processing)) && (DateTime.UtcNow - site.Info.StatusTime > TimeSpan.FromHours(1))); if ((site != null) && !site.Info.RefreshEnabled) { needToProcess = false; } if (needToProcess) { site = new Site(); site.Info.Domain = domain; if (_siteRepository.SaveSite(site, true)) { _siteRepository.QueueSiteForProcessing(domain); } else { site = null; } } return(site); }