Exemple #1
0
        static void Main(string[] args)
        {
            //// string url = "http://www.bjztb.gov.cn/";
            string url = "http://www.ifeng.com/";

            //List<string> hasVisitList = new List<string>();
            //Run(url, ref hasVisitList, "/html[1]/body[1]/center[1]/table[3]/tr[1]/td[2]/table[5]");

            Url urlHelper   = new Url(url);
            var domainUrl   = urlHelper.DomainUrl;
            var sp          = new WebPageLoader();
            var doc         = sp.GetPage(url);
            var urlList     = new HtmlParser(doc.DocumentNode.InnerHtml).GetUrlList(url);
            var visitedUrls = new List <string>();

            visitedUrls.Add(url);
            Console.WriteLine(url);
            int count = 0;

            do
            {
                for (int i = 0; i < urlList.Count; i++)
                {
                    if (!visitedUrls.Contains(urlList[i]))
                    {
                        var fixUrl = Url.GetObsluteUrl(url, urlList[i]);
                        if (fixUrl.StartsWith(domainUrl))
                        {
                            var doc1 = sp.GetPage(fixUrl);
                            urlList.AddRange(new HtmlParser(doc1.DocumentNode.InnerHtml).GetUrlList(urlList[i]));
                        }
                        visitedUrls.Add(urlList[i]);
                        try
                        {
                            urlList.Remove(urlList[i]);
                            Console.WriteLine(count + "---" + urlList[i]);
                        }
                        catch
                        {
                        }

                        count++;
                    }
                    else
                    {
                        try
                        {
                            urlList.Remove(urlList[i]);
                        }
                        catch
                        {
                        }
                    }
                }
            }while (urlList.Count > 0);

            Console.WriteLine("完毕!");
            Console.ReadKey();
        }