public List <WebScrapingModel> GetAll() { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; List <WebScrapingModel> List = new List <WebScrapingModel>(); string url = "https://www.tripadvisor.com/Restaurants-g32780-zfg9901-Newport_Beach_California.html"; var htmlWeb = new HtmlWeb(); HtmlDocument document = null; document = htmlWeb.Load(url); //getting all the span with class toptitle first var items = document.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("title")).ToList(); foreach (var node in items) { WebScrapingModel item = new WebScrapingModel(); item.Title = node.InnerText; //getting the anchor tag inside the span tag item.Url = node.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""); List.Add(item); //var getUrl = document.DocumentNode.Descendants("a"); // item.Url = node.GetAttributeValue("href", ""); } return(List); }
public object ScrapeSite(WebScrapingModel model) { var webClient = new WebClient(); var html = webClient.DownloadString(model.Url); var parser = new HtmlParser(); var document = parser.Parse(html); var map = document.QuerySelector(".biz-website"); var url = map.QuerySelector("a").TextContent; return(url); }
public HttpResponseMessage ScrapeSite(WebScrapingModel model) { try { object url = _webScraping.ScrapeSite(model); return(Request.CreateResponse(HttpStatusCode.OK, url)); } catch (Exception ex) { return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, ex)); } }