//[HttpGet("get")]
        public async Task <IActionResult> Get(string url = "")
        {
            Response.Headers.Add("Access-Control-Allow-Origin", "*");
            Response.Headers.Add("Access-Control-Allow-Credentials", "true");
            WebScraperModel model    = null;
            Response        response = new Response(model, "Success", false);

            try
            {
                response.Data = await WebScraperModel.GET(url);

                if (response.Data == null)
                {
                    response.Message = "Invalid URL";
                    response.Error   = true;
                    return(NotFound(response));
                }
            }
            catch (Exception ex)
            {
                return(BadRequest(ex));
            }


            return(Ok(response));
        }
示例#2
0
        public HttpResponseMessage PostBlogs(WebScraperModel model)
        {
            ItemResponse <int> resp = new ItemResponse <int>();

            resp.Item = _userService.PostBlogs(model);
            return(Request.CreateResponse(HttpStatusCode.OK, resp));
        }
示例#3
0
        public List <WebScraperModel> ScrapeData(string page)
        {
            List <WebScraperModel> _webScraperModel = new List <WebScraperModel>();

            var web = new HtmlWeb();
            var doc = web.Load(page);

            var Articles = doc.DocumentNode.SelectNodes("//article");

            foreach (var article in Articles)
            {
                var header      = HttpUtility.HtmlDecode(article.SelectSingleNode(".//a[@class = 'entry-title-link']").InnerText);
                var description = HttpUtility.HtmlDecode(article.SelectSingleNode(".//p").InnerText);
                var image       = HttpUtility.HtmlDecode(article.SelectSingleNode(".//img[@class = 'alignleft post-image entry-image']").GetAttributeValue("src", ""));



                WebScraperModel art = new WebScraperModel()
                {
                    Header      = header,
                    Description = description,
                    Image       = image
                };
                _webScraperModel.Add(art);
            }

            return(_webScraperModel);
        }
示例#4
0
        public int PostBlogs(WebScraperModel model)
        {
            int Id = 0;

            _dataProvider.ExecuteNonQuery(
                "WebScraper_Insert",
                inputParamMapper : delegate(SqlParameterCollection paramCol)
            {
                SqlParameter parm  = new SqlParameter();
                parm.ParameterName = "@Id";
                parm.SqlDbType     = System.Data.SqlDbType.Int;
                parm.Direction     = System.Data.ParameterDirection.Output;
                paramCol.Add(parm);

                paramCol.AddWithValue("@Header", model.Header);
                paramCol.AddWithValue("@Description", model.Description);
                paramCol.AddWithValue("@Image", model.Image);
            },
                returnParameters : delegate(SqlParameterCollection paramCol)
            {
                Id = (int)paramCol["@Id"].Value;
            }
                );
            return(Id);
        }
示例#5
0
        private async Task <WebScraperModel> GetPageData(string url)
        {
            var config  = Configuration.Default.WithDefaultLoader();
            var context = BrowsingContext.New(config);

            var document = await context.OpenAsync(url);

            WebScraperModel mymodel = new WebScraperModel();

            var HeadHtml = document.Head.InnerHtml;
            var AllHtml  = document.DocumentElement.InnerHtml;
            var parser   = new AngleSharp.Html.Parser.HtmlParser();

            var data        = parser.ParseDocument(HeadHtml);
            var allHtmlData = parser.ParseDocument(AllHtml);
            var MetaTags    = data.All.Where(x => x.LocalName == "meta");
            var AnchorTags  = allHtmlData.QuerySelectorAll("a");



            var metaTitle           = document.Title;
            var mataTagsKeywords    = MetaTags.FirstOrDefault(x => x.GetAttribute("Name") == "keywords").GetAttribute("Content");
            var mataTagsDescription = MetaTags.FirstOrDefault(x => x.GetAttribute("Name") == "description").GetAttribute("Content");
            var AllLinks            = AnchorTags.Where(x => x.GetAttribute("href").StartsWith("h")).ToList();
            var linksPath           = AllLinks.Cast <IHtmlAnchorElement>()
                                      .Select(m => m.Href)
                                      .ToList();



            ChromeOptions options = new ChromeOptions();

            options.AddArgument("headless");//Comment if we want to see the window.
            var driver = new ChromeDriver(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), options);

            driver.Navigate().GoToUrl(url);
            var screenshot = (driver as ITakesScreenshot).GetScreenshot();

            screenshot.SaveAsFile(Guid.NewGuid() + ".png");
            driver.Close();
            driver.Quit();

            mymodel.title         = metaTitle;
            mymodel.description   = mataTagsDescription;
            mymodel.keywords      = mataTagsKeywords;
            mymodel.AllHyperLinks = linksPath;
            mymodel.screenShot    = screenshot;



            return(mymodel);
        }