コード例 #1
0
ファイル: TuttiParser.cs プロジェクト: abuhamsa/TuttiBot
        //LOADING TUTTI.CH SEARCH HTML AND CREATES OFFER LIST WITH HTMLAGILITYPACK WITH HEADLESSCHROME
        //OBSOLETE DOES NOT WORK ATM
        public List <Offer> loadNextract()
        {
            //TODO: CHROMESTUFF VIELLEICHT IN EIGENE KLASSE SCHIEBEN
            var chromeOptions = new ChromeOptions();

            chromeOptions.AddArguments(new List <string>()
            {
                "--silent-launch",
                "--no-startup-window",
                "no-sandbox",
                "headless",
            });

            var chromeDriverService = ChromeDriverService.CreateDefaultService();

            chromeDriverService.HideCommandPromptWindow = true;    // This is to hide the console.
            ChromeDriver driver = new ChromeDriver(chromeDriverService, chromeOptions);

            driver.Navigate().GoToUrl(this.url);
            //SCROLLING
            IJavaScriptExecutor jse = (IJavaScriptExecutor)driver;

            for (int second = 0; ; second++)
            {
                if (second >= 12)
                {
                    break;
                }
                jse.ExecuteScript("window.scrollBy(0, 480)", ""); //480px nach unten scrollen
                Thread.Sleep(300);
            }
            //TODO: CHROMESTUFF VIELLEICHT IN EIGENE KLASSE SCHIEBEN

            //HTMLDOCUMENT FROM HTML AGILITY PACK
            var doc = new HtmlDocument();

            doc.LoadHtml(driver.PageSource);

            HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//div[@class='_228GQ _3HmX7']");

            List <Offer> offers = new List <Offer>();

            //RUNS FOR EVERY "CLASS=_228GQ _3HmX7"-DIV FOUND IN THE ROOT HTML
            foreach (HtmlNode node in htmlNodes)
            {
                Offer offer = new Offer();

                //IN "CLASS=_228GQ _3HmX7"-DIV ARE THE INFOS FOR ID, PUBLISHED, PRICE, THUMB, TITLE, DESCRIPTION, LINK
                try
                {
                    offer.offer_id    = sanitize(node.SelectSingleNode("./div[@class='_1abn1 _2KsM3']").Id);
                    offer.published   = sanitize(node.SelectSingleNode("//*[@id='" + offer.offer_id + "']/div/div[1]/div[2]/span").InnerText);
                    offer.price       = sanitize(node.SelectSingleNode("//*[@id=" + offer.offer_id + "]/div/div[3]/strong").InnerText);
                    offer.thumb_url   = sanitize(node.SelectSingleNode("//*[@id=" + offer.offer_id + "]/a/div/img/@src").GetAttributeValue("src", string.Empty));
                    offer.title       = sanitize(node.SelectSingleNode("//*[@id=" + offer.offer_id + "]/div/div[2]/div/a/h4").InnerText);
                    offer.description = sanitize(node.SelectSingleNode("//*[@id=" + offer.offer_id + "]/div/div[2]/div/p").InnerText);
                    offer.link        = sanitize("https://tutti.ch" + node.SelectSingleNode("//*[@id=" + offer.offer_id + "]/div/div[2]/div/a/@href").GetAttributeValue("href", string.Empty));

                    offers.Add(offer);
                    Console.WriteLine(offer.ToString());
                }
                catch (NullReferenceException e)
                {
                    //PRO ENTRIES HAVE A DIFFENT XPATH "./div[@class='_1abn1 _2KsM3 _2g4HX']" THIS LEADS TO A NULLPOINTER-EXEPTION
                    Console.WriteLine(e.ToString());
                }
            }


            return(offers);
        }