public string GetContentPage(ScrapingParameters parameters) { using (WebClient wc = new WebClient()) { string page = wc.DownloadString(parameters.Url); return(page); } }
public ActionResult GetContentGitHub([FromBody] ScrapingParameters parameters) { if (ModelState.IsValid) { return(Ok(Services.GetContentGitHub(parameters))); } else { return(BadRequest("Model not valid, use PostMan to send data!")); } }
public GitHub GetContentGitHub(ScrapingParameters parameters) { GitHub git = new GitHub(); List <Repository> repositories = new List <Repository>(); Queue <string> mylist = new Queue <string>(); using (WebClient wc = new WebClient()) { string page = wc.DownloadString(parameters.Url); var htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(page); git.Name = htmlDocument .DocumentNode .SelectSingleNode("//span[@itemprop='name']") .InnerText; git.UserName = htmlDocument .DocumentNode .SelectSingleNode("//span[@itemprop='additionalName']") .InnerText; git.Bio = htmlDocument .DocumentNode .SelectSingleNode("//div[@class='p-note user-profile-bio mb-2 js-user-profile-bio']") .InnerText; HtmlNodeCollection nodes = htmlDocument.DocumentNode.SelectNodes("//a[@data-filterable-for='your-repos-filter']"); foreach (HtmlNode htmlNode in (IEnumerable <HtmlNode>)nodes) { if (!mylist.Contains(htmlNode.InnerText)) { mylist.Enqueue(htmlNode.InnerText); } } foreach (HtmlNode node in htmlDocument.DocumentNode.ge) { if (node.Attributes.Count > 0) { Repository repository = new Repository(); //repository.Title = htmlDocument // .DocumentNode // .SelectSingleNode("//a[@itemprop='name codeRepository']") // .InnerText // .Trim() // .Replace("\\n",""); repository.Title = node.Descendants().First(x => x.Attributes["class"].Value.Equals("name codeRepository")).InnerText; repositories.Add(repository); } } git.Repositories = repositories; } return(git); }