private void AnalysicProduct(HtmlDocument htmlDocument, JobNodeCrawler jobNodeCrawler) { if (CheckIsProduct(jobNodeCrawler.Url)) { ProcessProductLink(jobNodeCrawler.Url); } }
public void Start() { while (_queueCrawler.Count > 0) { JobNodeCrawler jobNodeCrawler = _queueCrawler.Dequeue(); string html = GetHtml(jobNodeCrawler.Url); if (!string.IsNullOrEmpty(html)) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); Extraction(htmlDocument, jobNodeCrawler); AnalysicProduct(htmlDocument, jobNodeCrawler); } } }
private void Extraction(HtmlDocument htmlDocument, JobNodeCrawler jobNodeCrawler) { var nodesUrl = htmlDocument.DocumentNode.SelectNodes("//a"); if (nodesUrl != null) { foreach (var VARIABLE in nodesUrl) { string urlNew = VARIABLE.GetAttributeValue("href", ""); if (!string.IsNullOrEmpty(urlNew)) { if (CheckAllowVisit(urlNew)) { _queueCrawler.Enqueue(new JobNodeCrawler() { Deep = jobNodeCrawler.Deep + 1, Url = jobNodeCrawler.Url }); } } } } }