示例#1
0
        public void ImgsHelper(ScraperEngineResponse response)
        {
            //var results = response.Doc.DocumentNode.SelectNodes("//img[@alt='']");
            var results = response.Doc.DocumentNode.SelectNodes("//img[not(@alt)] | //img[@alt='']");

            if (results != null)
            {
                List <Task> tasks = new List <Task>();
                tasks.Add(Task.Run(() =>
                {
                    foreach (var result in results)
                    {
                        if (result.OuterHtml == null)
                        {
                            continue;
                        }
                        if (!ImgsWithNoAlt.ContainsKey(result.OuterHtml))
                        {
                            ImgsWithNoAlt.Add(result.OuterHtml, response.Url);
                        }
                    }
                    StateHasChangedDelegate?.Invoke();
                }));
            }
        }
示例#2
0
        public void DescriptionsHelper(ScraperEngineResponse response)
        {
            var results = response.Doc.DocumentNode.SelectNodes("//meta[@name='description']");

            if (results != null)
            {
                foreach (var result in results)
                {
                    var innerText = result.GetAttributeValue("content", "none");
                    if (innerText.Equals("none") || innerText.Equals(""))
                    {
                        TitleDescCheck description =
                            new TitleDescCheck("Missing description", innerText, response.Url);
                        EmptyDescriptions.Add(description);
                        AllDescriptions.Add(description);
                    }
                    else if (innerText.Length > 160)
                    {
                        TitleDescCheck description = new TitleDescCheck("Too long", innerText, response.Url);
                        LongDescriptions.Add(description);
                        AllDescriptions.Add(description);
                    }
                    else if (innerText.Length <= 50)
                    {
                        TitleDescCheck description = new TitleDescCheck("Too short", innerText, response.Url);
                        ShortDescriptions.Add(description);
                        AllDescriptions.Add(description);
                    }
                    else
                    {
                        TitleDescCheck description = new TitleDescCheck("Good", innerText, response.Url);
                        HealthyDescriptions.Add(description);
                        AllDescriptions.Add(description);
                    }
                    StateHasChangedDelegate?.Invoke();
                }
            }
        }
示例#3
0
        public void TitlesHelper(ScraperEngineResponse response)
        {
            var results = response.Doc.DocumentNode.SelectNodes("//title");

            if (results != null)
            {
                foreach (var result in results)
                {
                    if (result.OuterHtml == null || result.InnerText == "")
                    {
                        TitleDescCheck title = new TitleDescCheck("Missing title", result.InnerText, response.Url);
                        EmptyTitles.Add(title);
                        AllTitles.Add(title);
                    }
                    else if (result.InnerText.Length >= 60)
                    {
                        TitleDescCheck title = new TitleDescCheck("Too long", result.InnerText, response.Url);
                        LongTitles.Add(title);
                        AllTitles.Add(title);
                    }
                    else if (result.InnerText.Length <= 40)
                    {
                        TitleDescCheck title = new TitleDescCheck("Too short", result.InnerText, response.Url);
                        ShortTitles.Add(title);
                        AllTitles.Add(title);
                    }
                    else
                    {
                        TitleDescCheck title = new TitleDescCheck("Good", result.InnerText, response.Url);
                        HealthyTitles.Add(title);
                        AllTitles.Add(title);
                    }
                    StateHasChangedDelegate?.Invoke();
                }
            }
        }
示例#4
0
        private void GiveAds(ScraperEngineResponse response)
        {
            var doorstep = response.Doc.GetElementbyId("tads");

            if (doorstep != null)
            {
                HtmlNodeCollection topads;
                try
                {
                    topads = response.Doc.GetElementbyId("tads").SelectNodes("//li[@class='ads-ad']");
                    if (topads == null)
                    {
                        return;
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e);
                    throw;
                }
                foreach (var adresult in topads)
                {
                    var divClass = adresult.GetAttributeValue("class", "");
                    if (divClass != "ads-ad")
                    {
                        continue;
                    }
                    var url            = adresult.SelectSingleNode(".//cite[@class='UdQCqe']").InnerText;
                    var domain         = url;
                    var filtrirana     = "";
                    var filtereddomain = "";
                    if (domain != null)
                    {
                        if (!domain.StartsWith("https://"))    //if domain http || www
                        {
                            if (!domain.StartsWith("http://")) //uri needs http
                            {
                                domain = "http://" + domain;
                            }
                            Uri uri = new Uri(domain);
                            filtereddomain = uri.Host;                                     //e.g. http://www.example.com
                            string[] subdomain = filtereddomain.Split(new char[] { '.' }); //split string
                            if (filtereddomain.Contains("www."))
                            {
                                var count = subdomain[0].Length; //remove www
                                filtrirana = filtereddomain.Remove(0, count + 1);
                            }
                            else
                            {
                                var count = subdomain[0].Length;
                                filtrirana = filtereddomain.Remove(0, count + 1);
                            }
                        }
                        else //if domain https
                        {
                            domain = domain.Substring(8);
                            domain = "http://" + domain;
                            Uri uri = new Uri(domain);
                            filtereddomain = uri.Host;
                            if (filtereddomain.Contains("www."))
                            {
                                filtereddomain = filtereddomain.Substring(4);
                            }
                            Console.WriteLine(filtereddomain);
                        }
                    }
                    if (!filtereddomain.StartsWith("https://"))
                    {
                        filtereddomain = "https://" + filtereddomain;
                    }

                    var subject = adresult.SelectSingleNode(".//h3").InnerText;
                    WebUtility.HtmlDecode(subject);
                    var    desc    = adresult.SelectSingleNode(".//div[@class='ads-creative']").InnerText;
                    Advert tempres = new Advert(filtereddomain, filtrirana, subject, desc);
                    Reklame.Add(tempres);
                    StateHasChangedDelegate?.Invoke();
                }
            }
        }