Пример #1
0
        public void Surfing(string url, Action <string> act)
        {
            //从界面获取值
            int.TryParse(this.tbx_RecursionDepth.Text, out recursionDepth);
            if (this.cbx_CurrentPage.IsChecked.Value == true)
            {
                isGrabCurrentPageUrl = true;
            }

            globalBaseUrl = UrlUtil.ExtractBaseUrl(url);

            if (recursionDepth == StartDepth)
            {
                //使用CEF
                SurfingByCEF(url, act);
            }
            else
            {
                SurfingByFCL(url, act);
            }
        }
Пример #2
0
        private async void GetUrlRecursion(string url, int depth)
        {
            if (depth > recursionDepth)
            {
                return;
            }

            try
            {
                //Url Check
                var extractUrl = "";

                url = UrlUtil.FixUrl(url);

                string html = await WebUtil.GetHtmlSource(url);

                var recursionBaseUrl = UrlUtil.ExtractBaseUrl(url);

                await Task.Run(() => {
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(html.ToString());
                    HtmlAgilityPack.HtmlNodeCollection nodeCollection = doc.DocumentNode.SelectNodes("//a");

                    if (nodeCollection == null)
                    {
                        return;
                    }

                    for (int i = 0; i < nodeCollection.Count; i++)
                    {
                        var hrefAttribute = nodeCollection[i].Attributes["href"];
                        if (hrefAttribute == null)
                        {
                            continue;
                        }
                        extractUrl = hrefAttribute.Value;
                        if (string.IsNullOrEmpty(extractUrl))
                        {
                            continue;
                        }
                        if (extractUrl.StartsWith("/"))
                        {
                            extractUrl = recursionBaseUrl + extractUrl;
                        }
                        AddToCollection(new UrlStruct()
                        {
                            Id = (i + 1), Status = "", Title = "", Url = extractUrl
                        }, globalBaseUrl);

                        System.Threading.Thread.Sleep(3000);

                        GetUrlRecursion(extractUrl, depth);
                    }
                });
            }
            catch (Exception ex)
            {
                ShowStatusText(ex.Message);
            }

            depth++;
        }