//[HttpGet("get")] public async Task <IActionResult> Get(string url = "") { Response.Headers.Add("Access-Control-Allow-Origin", "*"); Response.Headers.Add("Access-Control-Allow-Credentials", "true"); WebScraperModel model = null; Response response = new Response(model, "Success", false); try { response.Data = await WebScraperModel.GET(url); if (response.Data == null) { response.Message = "Invalid URL"; response.Error = true; return(NotFound(response)); } } catch (Exception ex) { return(BadRequest(ex)); } return(Ok(response)); }
public HttpResponseMessage PostBlogs(WebScraperModel model) { ItemResponse <int> resp = new ItemResponse <int>(); resp.Item = _userService.PostBlogs(model); return(Request.CreateResponse(HttpStatusCode.OK, resp)); }
public List <WebScraperModel> ScrapeData(string page) { List <WebScraperModel> _webScraperModel = new List <WebScraperModel>(); var web = new HtmlWeb(); var doc = web.Load(page); var Articles = doc.DocumentNode.SelectNodes("//article"); foreach (var article in Articles) { var header = HttpUtility.HtmlDecode(article.SelectSingleNode(".//a[@class = 'entry-title-link']").InnerText); var description = HttpUtility.HtmlDecode(article.SelectSingleNode(".//p").InnerText); var image = HttpUtility.HtmlDecode(article.SelectSingleNode(".//img[@class = 'alignleft post-image entry-image']").GetAttributeValue("src", "")); WebScraperModel art = new WebScraperModel() { Header = header, Description = description, Image = image }; _webScraperModel.Add(art); } return(_webScraperModel); }
public int PostBlogs(WebScraperModel model) { int Id = 0; _dataProvider.ExecuteNonQuery( "WebScraper_Insert", inputParamMapper : delegate(SqlParameterCollection paramCol) { SqlParameter parm = new SqlParameter(); parm.ParameterName = "@Id"; parm.SqlDbType = System.Data.SqlDbType.Int; parm.Direction = System.Data.ParameterDirection.Output; paramCol.Add(parm); paramCol.AddWithValue("@Header", model.Header); paramCol.AddWithValue("@Description", model.Description); paramCol.AddWithValue("@Image", model.Image); }, returnParameters : delegate(SqlParameterCollection paramCol) { Id = (int)paramCol["@Id"].Value; } ); return(Id); }
private async Task <WebScraperModel> GetPageData(string url) { var config = Configuration.Default.WithDefaultLoader(); var context = BrowsingContext.New(config); var document = await context.OpenAsync(url); WebScraperModel mymodel = new WebScraperModel(); var HeadHtml = document.Head.InnerHtml; var AllHtml = document.DocumentElement.InnerHtml; var parser = new AngleSharp.Html.Parser.HtmlParser(); var data = parser.ParseDocument(HeadHtml); var allHtmlData = parser.ParseDocument(AllHtml); var MetaTags = data.All.Where(x => x.LocalName == "meta"); var AnchorTags = allHtmlData.QuerySelectorAll("a"); var metaTitle = document.Title; var mataTagsKeywords = MetaTags.FirstOrDefault(x => x.GetAttribute("Name") == "keywords").GetAttribute("Content"); var mataTagsDescription = MetaTags.FirstOrDefault(x => x.GetAttribute("Name") == "description").GetAttribute("Content"); var AllLinks = AnchorTags.Where(x => x.GetAttribute("href").StartsWith("h")).ToList(); var linksPath = AllLinks.Cast <IHtmlAnchorElement>() .Select(m => m.Href) .ToList(); ChromeOptions options = new ChromeOptions(); options.AddArgument("headless");//Comment if we want to see the window. var driver = new ChromeDriver(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), options); driver.Navigate().GoToUrl(url); var screenshot = (driver as ITakesScreenshot).GetScreenshot(); screenshot.SaveAsFile(Guid.NewGuid() + ".png"); driver.Close(); driver.Quit(); mymodel.title = metaTitle; mymodel.description = mataTagsDescription; mymodel.keywords = mataTagsKeywords; mymodel.AllHyperLinks = linksPath; mymodel.screenShot = screenshot; return(mymodel); }