private async Task SaveScrapingResult(ScrapingResult scrapingResult)
        {
            if (scrapingResult == null)
            {
                // Some scrapers are only used to start other scrapers. Those don't return results.
                return;
            }

            _logger.LogInformation($"Saving scraping results for {scrapingResult.Url}");

            // Find appropriate persister
            var persister = _persisterFactory.GetPersister(scrapingResult.ResultObjectType);

            if (persister == null)
            {
                // No persister for this type of scraped object
                _logger.LogWarning($"No persister found for object {scrapingResult.ResultObjectType}");
                return;
            }

            // Use reflection to invoke method with generic parameter
            var method = persister.GetType().GetMethod("Persist");
            var persisterTaskResult = (Task)method.Invoke(persister, new[] { scrapingResult.ResultObject });

            if (persisterTaskResult.Exception?.InnerException != null)
            {
                _logger.LogError($"Error occured while persisting URL: {scrapingResult.Url}\n{persisterTaskResult.Exception.InnerException}");
            }
        }
Exemplo n.º 2
0
        public async Task <ActionResult <ScrapingResult> > GetScrapedData([FromBody] string siteName)
        {
            if (siteName != "http://techbitsolution.com")
            {
                return(Ok(null));
            }
            ScrapingResult scrapingResult = await CheckForUpdates(siteName, "Web-Scraper updates");

            return(Ok(scrapingResult));
        }
        private ScrapingResult ScrapingTaskOperation(SearchCriteria sc, int pageNo, LIScraper scraper)
        {
            bool           morerecords = false;
            ScrapingResult res         = new ScrapingResult();

            res.UserProfiles        = scraper.StartUserProfilesScraping(sc, pageNo, out morerecords);
            res.AreThereMoreRecords = morerecords;

            //var res = new ScrapingResult();
            //res.AreThereMoreRecords = false;
            //res.UserProfiles = new List<LIUserData>() { new LIUserData() { ConnectionDegree = "2nd", CurrentWorkingTitle = "jobee", ProfileTitle = "imran" } };
            //Thread.Sleep(10000);


            return(res);
        }
        public async Task <ScrapingResult> StartScraping(SearchCriteria sc, int pageNo, LIScraper scraper)
        {
            ScrapingResult result = await StartScraingTask(sc, pageNo, scraper);

            return(result);
        }
Exemplo n.º 5
0
 public ScrapingResult Create(ScrapingResult scapingData)
 {
     _scarpingResult.InsertOne(scapingData);
     return(scapingData);
 }
Exemplo n.º 6
0
        private async Task <ScrapingResult> GetPageData(string url, List <dynamic> results)
        {
            ScrapingResult result = new ScrapingResult();

            result.Hyperlinks       = new List <string>();
            result.SocialMediaLinks = new List <string>();
            var config   = Configuration.Default.WithDefaultLoader();
            var context  = BrowsingContext.New(config);
            var document = await context.OpenAsync(url);

            result.SiteName = url;
            result.Title    = (document.GetElementsByTagName("title"))[0].InnerHtml;
            var    elements   = getAllElements(document.Head);
            string headerData = string.Empty;;
            string BodyData   = string.Empty;;

            foreach (var ele in elements)
            {
                if (ele.LocalName.ToLower() == "head")
                {
                    headerData = ((AngleSharp.Html.Dom.IHtmlHeadElement)ele).InnerHtml;
                }
                if (ele.LocalName.ToLower() == "meta" && ((AngleSharp.Html.Dom.IHtmlMetaElement)ele).Name == "description")
                {
                    result.MetaDescription = ((AngleSharp.Html.Dom.IHtmlMetaElement)ele).Content;
                }
                else if (ele.LocalName.ToLower() == "meta" && ((AngleSharp.Html.Dom.IHtmlMetaElement)ele).Name == "keywords")
                {
                    result.MetaKeywords = ((AngleSharp.Html.Dom.IHtmlMetaElement)ele).Content;
                }
            }

            var bodyElement = getAllElements(document.Body);

            foreach (var ele in bodyElement)
            {
                if (ele.LocalName.ToLower() == "body")
                {
                    BodyData = ((AngleSharp.Html.Dom.IHtmlBodyElement)ele).InnerHtml;
                }
                if (ele.LocalName.ToLower() == "a")
                {
                    Uri oldUri = new Uri(((AngleSharp.Html.Dom.IHtmlAnchorElement)ele).Href);
                    Uri NewUri;

                    Uri.TryCreate(oldUri.ToString(), UriKind.Absolute, out NewUri);

                    var s = NewUri.Authority;
                    if (s == "www.facebook.com" || s == "www.youtube.com" || s == "twitter.com" || s == "www.linkedin.com")
                    {
                        result.SocialMediaLinks.Add(((AngleSharp.Html.Dom.IHtmlAnchorElement)ele).Href);
                    }
                    else
                    {
                        result.Hyperlinks.Add(((AngleSharp.Html.Dom.IHtmlAnchorElement)ele).Href);
                    }
                }
            }

            //string ScreenHtml = headerData + BodyData;
            var converter = new HtmlConverter();

            result.SocialMediaLinks = result.SocialMediaLinks.Distinct().ToList();
            result.Hyperlinks       = result.Hyperlinks.Where(stringToCheck => stringToCheck.Contains("https://techbitsolution.com/services/")).ToList();
            result.Hyperlinks       = result.Hyperlinks.Distinct().ToList();
            return(result);
        }