예제 #1
0
 private void AnalysicProduct(HtmlDocument htmlDocument, JobNodeCrawler jobNodeCrawler)
 {
     if (CheckIsProduct(jobNodeCrawler.Url))
     {
         ProcessProductLink(jobNodeCrawler.Url);
     }
 }
예제 #2
0
 public void Start()
 {
     while (_queueCrawler.Count > 0)
     {
         JobNodeCrawler jobNodeCrawler = _queueCrawler.Dequeue();
         string         html           = GetHtml(jobNodeCrawler.Url);
         if (!string.IsNullOrEmpty(html))
         {
             HtmlDocument htmlDocument = new HtmlDocument();
             htmlDocument.LoadHtml(html);
             Extraction(htmlDocument, jobNodeCrawler);
             AnalysicProduct(htmlDocument, jobNodeCrawler);
         }
     }
 }
예제 #3
0
        private void Extraction(HtmlDocument htmlDocument, JobNodeCrawler jobNodeCrawler)
        {
            var nodesUrl = htmlDocument.DocumentNode.SelectNodes("//a");

            if (nodesUrl != null)
            {
                foreach (var VARIABLE in nodesUrl)
                {
                    string urlNew = VARIABLE.GetAttributeValue("href", "");
                    if (!string.IsNullOrEmpty(urlNew))
                    {
                        if (CheckAllowVisit(urlNew))
                        {
                            _queueCrawler.Enqueue(new JobNodeCrawler()
                            {
                                Deep = jobNodeCrawler.Deep + 1,
                                Url  = jobNodeCrawler.Url
                            });
                        }
                    }
                }
            }
        }