public static async Task <CompanyDetails> GetDetailContext(string url) { var CDetails = new CompanyDetails(); try { HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); var result = await htmlWeb.LoadFromWebAsync(url); var DN = result.DocumentNode; var id = System.Text.RegularExpressions.Regex.Match(url, @"(?<=/)\d+(?=\.html)").Value;; CDetails.Id = Convert.ToInt64(id); CDetails.Name = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[1]/span").InnerText.Replace("公司名称:", "").Trim(); CDetails.Address = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[3]/span").InnerText.Replace("公司地址:", "").Trim(); CDetails.Contect = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[3]/div[2]/ul/li[2]/span").InnerText.Replace("法人代表:", "").Trim(); CDetails.Phone = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[4]/div[2]/ul/li[7]/span").InnerText.Replace("公司传真:", "").Trim(); CDetails.Details = DN.SelectSingleNode("/html/body/div[3]/div[2]/div[1]/div[2]/div[2]").InnerText.Replace(" ", "").Trim(); } catch (Exception ex) { Thread.Sleep(5000); throw new Exception(ex.Message + "(" + url + ")"); } return(CDetails); }
/* * note: how to use getdownloadlink * videourl: is the url of the video that was scraped from get videos or someone who was introduced by the user * quality: if you want low resolution put a 0 on quality. If you want to get the hd /sd videos put 1 on quality */ public string getdownloadlink(string videourl, int quality) { var doc2 = new HtmlAgilityPack.HtmlWeb(); /////////////se busca la pagina de info de el video var htmlDoc2 = doc2.LoadFromWebAsync(videourl).Result; var nodee = htmlDoc2.GetElementbyId("html5video_base"); var elems = nodee.ChildNodes; string link = ""; if (quality == 0) { link = elems[1].ChildNodes[0].ChildNodes[0].Attributes["href"].Value; } else { link = elems[1].ChildNodes[1].ChildNodes[0].Attributes["href"].Value; } Console.WriteLine("ok"); return(link); }
public static void Run() { ConsoleHelper.Info("============================Start============================"); var web = new HtmlAgilityPack.HtmlWeb(); var mingJuList = new List <MingJu>(); var result = Parallel.For(0, MingJuMaxPage, index => { var taskId = $"Task#{index}"; var url = string.Format(MingJu, index + 1, string.Empty, string.Empty); ConsoleHelper.Info($"Start: {taskId}"); var page = web.LoadFromWebAsync(url).Result; var list = page.DocumentNode.SelectNodes("/html/body/div[2]/div[1]/div[2]/div"); if (list != null) { foreach (var item in list) { var content = item.SelectSingleNode("a[1]")?.InnerText; var source = item.SelectSingleNode("a[2]")?.InnerText; if (string.IsNullOrEmpty(content)) { continue; } lock (syncLock) { mingJuList.Add(new Spider.MingJu(content, source)); } } } ConsoleHelper.Success($"Success: {taskId}"); }); if (result.IsCompleted) { ConsoleHelper.Success($"Total: {mingJuList.Count}"); ConsoleHelper.Info("=============================End============================="); foreach (var item in mingJuList) { ConsoleHelper.Info($"{item.Content} —— {item.Source}"); } } }
public static async Task <IEnumerable <Titulo> > FetchAsync(string name = "") { var web = new HtmlAgilityPack.HtmlWeb(); var doc = await web.LoadFromWebAsync("http://www.tesouro.fazenda.gov.br/tesouro-direto-precos-e-taxas-dos-titulos"); var tables = doc .DocumentNode .Descendants() .Where(n => n.Name == "table" && n.HasClass("tabelaPrecoseTaxas")); var bonds = new List <Titulo>(); foreach (var table in tables) { var tableNodes = table .Descendants() .Where(n => n.Name == "tr" && n.HasClass("camposTesouroDireto")); foreach (var tableNode in tableNodes) { var values = tableNode .Descendants() .Where(d => d.Name == "td") .ToArray(); var bond = values.Length >= 5 ? new Titulo(values[0].InnerHtml, values[1].InnerHtml, values[2].InnerHtml, values[3].InnerHtml.Replace("R$", string.Empty).Replace(".", string.Empty), values[4].InnerHtml.Replace("R$", string.Empty).Replace(".", string.Empty), TipoDeTitulo.Compra) : new Titulo(values[0].InnerHtml, values[1].InnerHtml, values[2].InnerHtml, null, values[3].InnerHtml.Replace("R$", string.Empty).Replace(".", string.Empty), TipoDeTitulo.Venda); bonds.Add(bond); } } return(bonds.Where(t => string.IsNullOrEmpty(name) ? true : t.Nome.ToUpperInvariant().Contains(name.ToUpperInvariant())).ToList()); }
public async Task <Property> GetProperty(string propertyUrl) { var url = new Uri(propertyUrl); if (url.Host != Configuration["TradeMeHostUrl"]) { throw new ArgumentException("Only Trade Me listings are supported"); } var web = new HtmlAgilityPack.HtmlWeb(); var document = await web.LoadFromWebAsync(propertyUrl); var address = document.DocumentNode.SelectSingleNode(TradeMeAddressXpath).InnerText; var homesValue = GetHomesValue(address); return(new Property { Address = address, Cv = 900000, HomesValue = homesValue }); }
public async Task <List <string> > GetWeb(string theUrl) { var morePages = new List <string>(); theUrl = theUrl.ToLower(); if (!DoneUrls.Contains(theUrl)) { DoneUrls.Add(theUrl); string pageContent; using (var client = new HttpClient()) { pageContent = await client.GetStringAsync(theUrl).ConfigureAwait(continueOnCapturedContext: false); } var hap = new HtmlAgilityPack.HtmlWeb(); var hdoc = await hap.LoadFromWebAsync(theUrl).ConfigureAwait(continueOnCapturedContext: false); //var hrefs = hdoc.s } return(morePages); }
public static async Task <List <string> > GetPageContextAsync(string CountyName, string SourceUrl) { var urlList = new List <string>(); try { HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); if (string.IsNullOrWhiteSpace(SourceUrl)) { throw new Exception("发现空地址"); } var result = await htmlWeb.LoadFromWebAsync(SourceUrl); var root = result.DocumentNode; var list = root.SelectNodes("//h2[@class='colist_item_title']"); if (list != null) { foreach (HtmlAgilityPack.HtmlNode item in list) { var val = item.SelectSingleNode("a").Attributes["href"].Value; urlList.Add(val); } CompanyDetailsQueue.Enqueue(new KeyValuePair <string, string[]>(CountyName, urlList.ToArray())); } else { CountyPageUrlQueue.Enqueue(new KeyValuePair <string, string>(CountyName, SourceUrl)); Thread.Sleep(5000); throw new Exception("数据下载不完整,已重新入队."); } } catch (Exception ex) { Thread.Sleep(5000); throw new Exception(ex.Message + "(" + SourceUrl + ")"); } return(urlList); }
/* * * how to use getvideos * Pagecount:is the number of pages that will be scraped * querry:is used when you want to search video results that match with the written criteria * page: is used to load the current page of that criteria * if you leave querry with the "" it will automatically search results from the home * . * as result of the getvideos it will return yo an pagedata who contains the following * pagedata { * navigationmax:is the max number of pages that you can use in the current search * videomodels[]: is an array of videomodels who contains some information of each videos that match * with the criteria * * } * */ public Modals.pagedata getvideos(int pagecount, string querry = "", int page = 0) { var videos = new List <Modals.videosmodels>(); Modals.pagedata pagedataa = new Modals.pagedata(); string baseurl = ""; if (querry == "") { baseurl = "http://www.xvideos.com/"; } else { baseurl = "http://www.xvideos.com/?k=" + querry.Replace(' ', '+'); } for (int i = 0; i < pagecount; i++) { int pageno = i; if (page > 0) { pageno = page; } var doc2 = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument htmlDoc2 = null; /////////////se busca la pagina de info de el video if (querry != "") { htmlDoc2 = doc2.LoadFromWebAsync(baseurl + "&p=" + pageno).Result; } else { if (page > 0) { htmlDoc2 = doc2.LoadFromWebAsync(baseurl + "new/" + (pageno + 1) + "/").Result; } else { htmlDoc2 = doc2.LoadFromWebAsync(baseurl).Result; } } var paginations = htmlDoc2.DocumentNode.SelectNodes("//*[contains(@class,'pagination')]"); if (!paginations.Last().Attributes["class"].Value.Contains("pagination-with-settings")) { var elemsx = paginations.Last().ChildNodes["ul"].ChildNodes; var outfake = 0; var numeros = elemsx.Where(aax => int.TryParse(aax.InnerText, out outfake)); pagedataa.navigationmax = int.Parse(numeros.Last().InnerText); } else { pagedataa.navigationmax = 0; } var elems = htmlDoc2.DocumentNode.SelectNodes("//*[contains(@class,'thumb-block')]"); foreach (var xd in elems) { var elemento = new Modals.videosmodels(); elemento.link = "http://www.xvideos.com" + xd.Descendants().Where(aax => aax.Attributes["class"].Value == "thumb").First().ChildNodes["a"].Attributes["href"].Value; if (!elemento.link.Contains("/pornstar-channels/") && !elemento.link.Contains("/model-channels/") && !elemento.link.Contains("/profiles/")) { var elemthumb = xd.Descendants().Where(aax => aax.Attributes["class"].Value == "thumb").First().ChildNodes["a"].ChildNodes["img"]; try { elemento.thumb = elemthumb.Attributes["data-src"].Value; } catch (Exception) { elemento.thumb = elemthumb.Attributes["src"].Value; } elemento.title = WebUtility.HtmlDecode(xd.ChildNodes[1].ChildNodes["p"].ChildNodes["a"].Attributes["title"].Value); elemento.duration = xd.ChildNodes[1].ChildNodes[1].ChildNodes["span"].ChildNodes["span"].InnerText; videos.Add(elemento); Console.WriteLine(videos.Count - 1 + "===>" + elemento.title); } } } pagedataa.videos = videos; return(pagedataa); }