コード例 #1
0
ファイル: Transform.cs プロジェクト: pkozak2/CeneoETL-Projekt
        private async Task getAllCommentsSkapiec(string html, string urlSkapiec)
        {
            HtmlDocument htmlDocument = new HtmlDocument();
            htmlDocument.LoadHtml(html);

            int pagesCount = 1;

            try
            {
                var commentsDiv = htmlDocument.DocumentNode.Descendants("div").Where(o => o.GetAttributeValue("class", null) == "opinion").FirstOrDefault();
                var comments = commentsDiv.Descendants("a").Where(o => o.GetAttributeValue("href", null) == "#opinie").First().InnerText.Trim();
                int commentsCounter = Convert.ToInt32(Regex.Match(comments, @"\d+").Value);

                if (commentsCounter >= 30)
                {
                    pagesCount = (commentsCounter % 30 == 0) ? commentsCounter / 30 : commentsCounter / 30 + 1;
                }
            }
            catch
            {
                pagesCount = 1;
            }

            for (int i = 1; i <= pagesCount; i++)
            {
                if (i == 1)
                {
                    listaOpinii.AddRange(await transformSkapiec(html));
                }
                else
                {
                    Extract t = new Extract();
                    listaOpinii.AddRange(await transformSkapiec(await t.ExtractHTML(urlSkapiec + "_komentarze/" + i)));
                }
            }
        }
コード例 #2
0
ファイル: Transform.cs プロジェクト: pkozak2/CeneoETL-Projekt
        private async Task getAllCommentsCeneo(string html, string urlCeneo)
        {
            HtmlDocument htmlDocument = new HtmlDocument();
            htmlDocument.LoadHtml(html);

            int pagesCount = 1; 
            try
            {
                int commentsCounter = Convert.ToInt32(htmlDocument.DocumentNode.Descendants("span").Where(o => o.GetAttributeValue("itemprop", null) == "reviewCount").First().InnerText);
                
                if (commentsCounter >= 10)
                {
                    pagesCount = (commentsCounter % 10 == 0) ? commentsCounter / 10 : commentsCounter / 10 + 1;
                }
            }
            catch
            {
                pagesCount = 1;
            }


            for (int i = 1; i <= pagesCount; i++)
            {
                if (i == 1)
                {
                    listaOpinii.AddRange(await transformCeneo(html));
                }
                else
                {
                    Extract t = new Extract();
                    listaOpinii.AddRange(await transformCeneo(await t.ExtractHTML(urlCeneo + "/opinie-" + i)));
                }
            }
        }