public async Task Perform() { int len = _links.Count; await Notify("Finished:", true); await Task.Run(() => { Parallel.For(0, 5, i => { while (_links.Exists(x => !x.Checked)) { Link link; lock (_locker) { link = _links.FirstOrDefault(x => !x.Checked); if (link != null) { link.Checked = true; } } string content = HttpClientX.Find(link.Url).Result; string result = string.Empty; try { HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(content); var hs = html.DocumentNode.SelectNodes(".//*"); foreach (var h in hs) { string tag = h.Name; if (tag.Equals("h1") || tag.Equals("h2") || tag.Equals("h3") || tag.Equals("h4") || tag.Equals("h5") || tag.Equals("h6")) { result += "\r\n" + h.OuterHtml + "\r\n"; } } } catch { } Notify(link.Number.ToString() + " ; ", false).Wait(); Application.Current.Dispatcher.Invoke(() => { link.Content = link.Url + "\r\n" + result; }); } }); }); }
private async Task <string> GetPages(string key) { await Notify("getting google pages", true); string searchurl = "http://google.com/search?q=" + key; string resp = await HttpClientX.Find(searchurl); var html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(resp); var table = html.GetElementbyId("foot"); foreach (var td in table.QuerySelectorAll("a")) { string href = td.GetAttributeValue("href", ""); _pages.Add("http://google.com" + href); } return(resp); }
public async Task <List <string> > GetTop(string key) { string resp = await GetPages(key); List <string> result = new List <string>(); string searchurl = "http://google.com/search?q=" + key; int page = 0; while (true) { await Notify("getting website links", true); HtmlAgilityPack.HtmlDocument html; try { if (page != 0) { resp = await HttpClientX.Find(_pages[page]); } html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(resp); var hs = html.DocumentNode.QuerySelectorAll("#search div[class=\"g\"] div[class=\"r\"] a"); foreach (var h in hs) { if (h.Name == "a" && h.QuerySelectorAll("h3").Count != 0) { string href = h.GetAttributeValue("href", ""); if (href != "") { result.Add(href); } } } if (_pages.Count > page && result.Count < Depth) { page++; continue; } else { break; } } catch { } finally { } } await Notify("list created", true); return(result.Where(x => x.Contains("http:") || x.Contains("https:")).ToList()); }