private void HandleDomainStat(DomainStat domainStat) { try { _logger.Information($"Currently {domainStat.IndexingData.PagesNumber} pages indexed for the domain {domainStat.DomainURL}"); var domain = _domainService.GetDomainByUrl(domainStat.DomainURL); var result = ProcessDomainWithStat(domain, domainStat); _hub.SendDomain(result); _logger.Information($"Finished processing of the domain {domainStat.DomainURL}"); } catch (Exception ex) { _logger.Error($"Error happened while processing the domain: {ex.Message}"); throw; } }
private DomainDTO ProcessDomainWithStat(DomainDTO domain, DomainStat domainStat) { var hasCurrentDateStats = domain.IndexingStats.Where(st => st.ProcessingDate.Date.Equals(domainStat.IndexingData.ProcessingDate.Date)).Count() != 0; if (hasCurrentDateStats) { domain.IndexingStats.RemoveAt(domain.IndexingStats.Count - 1); domain.IndexingStats.Add(domainStat.IndexingData); } else { domain.IndexingStats.Add(domainStat.IndexingData); } if (domainStat.IndexingData.PagesNumber == 0 || (domainStat.IndexingData.PagesNumber != 0 && domain.IsDeindexed == true)) { domain.IsDeindexed = !domain.IsDeindexed; } var result = _domainService.UpdateDomain(domain); return(result); }
private void HandleDomainUrl(ParserInput message) { IndexingData indexingData = null; using (var webClient = new WebClient()) { if (message.UseProxy) { webClient.Proxy = _proxyProvider.GetProxy(); } var url = _proxyProvider.GetRequestUrl(string.Format(GoogleSearchUrl, message.Url)); webClient.Headers.Add("Accept-Language", "en-US"); var pageData = webClient.DownloadString(url); var page = new HtmlDocument(); page.LoadHtml(pageData); HtmlNode resultStatsNode = page.GetElementbyId("resultStats"); if (resultStatsNode == null || resultStatsNode.InnerHtml == string.Empty) { indexingData = new IndexingData { PagesNumber = 0, ProcessingDate = DateTime.Now.Date }; var domainStat = new DomainStat { DomainURL = message.Url, IndexingData = indexingData }; Sender.Tell(domainStat, Self); return; } var resultStats = HtmlEntity.DeEntitize(resultStatsNode.InnerHtml); var firstNumberIndex = resultStats.IndexOfAny("123456789".ToCharArray()); var lastNumberIndex = resultStats.LastIndexOfAny("0123456789".ToCharArray()); if (firstNumberIndex >= 0 && lastNumberIndex >= 0 && lastNumberIndex >= firstNumberIndex) { var match = resultStats.Substring(firstNumberIndex, lastNumberIndex - firstNumberIndex + 1); var numberString = match.Replace(",", string.Empty).Replace(".", string.Empty); numberString = Regex.Replace(numberString, @"\s+", ""); var indexedPagesNumber = long.Parse(numberString); indexingData = new IndexingData { PagesNumber = indexedPagesNumber, ProcessingDate = DateTime.Now.Date }; var domainStat = new DomainStat { DomainURL = message.Url, IndexingData = indexingData }; Sender.Tell(domainStat, Self); } else { var errorMessage = "Match was not successful! Result stats: " + resultStats; var parsingError = new ParsingError { DomainURL = message.Url, ErrorMessage = errorMessage }; Sender.Tell(parsingError, Self); } } }
public void SendDomainStat(DomainStat domainStat) { IHubContext context = Startup.ConnectionManager.GetHubContext <DomainStatSenderHub>(); context.Clients.All.SendDomainStat(domainStat); }