private IList <Pages.PageComponents.ShopMenu> prepareMenu()
        {
            IList <Pages.PageComponents.ShopMenu> menuDTOArray = new List <Pages.PageComponents.ShopMenu>();


            IElement menuList = document.GetElementsByClassName("cs-nav")[0];

            int menuItemsCount = menuList.ChildElementCount;

            for (int i = 1; i < menuItemsCount; i++)
            {
                IElement         menuElement = (IElement)menuList.ChildNodes[i];
                IElement         aHrefelem   = (IElement)menuElement.ChildNodes[0];
                HTMLAnchorDetail ancdDetail  = extractor.extractAchrorInfo(aHrefelem);


                menuDTOArray.Add(new Pages.PageComponents.ShopMenu {
                    Href = invokedPage.ShopUrl + ancdDetail.Href, ItemName = ancdDetail.Name
                });
            }

            menuDTOArray.Add(
                new Pages.PageComponents.ShopMenu {
                ItemName = "Auto", Href = "http://www.wartonlogo.com/Projectors/Gobos%20Auto-switch/"
            }
                );



            return(menuDTOArray);
        }
Пример #2
0
        private static int GetNumberOfDisplayedResults(IHtmlDocument pageContent)
        {
            var regionResults   = pageContent.GetElementsByClassName("directory-region-search-result-item");
            var serviceResults  = pageContent.GetElementsByClassName("directory-service-search-result-item");
            var hospitalResults = pageContent.GetElementsByClassName("directory-hospital-search-result-item");
            var userResults     = pageContent.GetElementsByClassName("directory-user-search-result-item");

            return(regionResults.Length + serviceResults.Length + hospitalResults.Length + userResults.Length);
        }
Пример #3
0
        GetDisplayedResultsTextContent(IHtmlDocument pageContent)
        {
            var regionResults = pageContent.GetElementsByClassName("directory-region-search-result-item")
                                .Select(e => e.TextContent);
            var serviceResults = pageContent.GetElementsByClassName("directory-service-search-result-item")
                                 .Select(e => e.TextContent);
            var hospitalResults = pageContent.GetElementsByClassName("directory-hospital-search-result-item")
                                  .Select(e => e.TextContent);
            var userResults = pageContent.GetElementsByClassName("directory-user-search-result-item")
                              .Select(e => e.TextContent);

            return(regionResults, serviceResults, hospitalResults, userResults);
        }
Пример #4
0
 private IHtmlCollection <IElement> GetOpcoes(IHtmlDocument document)
 {
     return(document.GetElementsByClassName("" +
                                            "collection-item waves-effect waves-on-white-bg grey-text " +
                                            "text-darken-4 d-flex flex-wrap flex-md-nowrap justify-between " +
                                            "align-items-center p-relative"));
 }
        public string ScrapeMostRecentBooking(IHtmlDocument htmlDoc)
        {
            var lastBookingElement = htmlDoc.GetElementsByClassName(ScrapingConstants.LastBookingClass)
                                     .FirstOrDefault();

            return(lastBookingElement?.GetElementText() ?? string.Empty);
        }
Пример #6
0
        private IHtmlCollection <IElement> GetPageWithProduct(string url, out IHtmlDocument htmlDocument)
        {
            foreach (var proxy in proxyList)
            {
                try
                {
                    WebRequest WR = WebRequest.Create(url);
                    WR.Method = "GET";
                    string[] fulladress = proxy.Split(":");
                    var(adress, port) = (fulladress[0], int.Parse(fulladress[1]));
                    WebProxy myproxy = new WebProxy(adress, port);
                    myproxy.BypassProxyOnLocal = false;
                    WR.Proxy = myproxy;
                    WebResponse response = WR.GetResponse();
                    string      html;
                    using (Stream stream = response.GetResponseStream())
                    {
                        using (StreamReader reader = new StreamReader(stream))
                        {
                            html = reader.ReadToEnd();
                        }
                    }

                    HtmlParser parser = new HtmlParser();
                    htmlDocument = parser.ParseDocument(html);
                    return(htmlDocument.GetElementsByClassName("g-i-tile-catalog"));
                }
                catch (Exception ex)
                {
                }
            }

            throw new Exception("Proxy isn`t working");
        }
Пример #7
0
        public HabraModel Parse(IHtmlDocument document)
        {
            var post = document.GetElementsByClassName("post__wrapper").FirstOrDefault();

            if (post == null)
            {
                return(null);
            }

            var author  = post.GetElementsByClassName("user-info__nickname user-info__nickname_small").FirstOrDefault()?.TextContent?.Trim();
            var dateStr = post.GetElementsByClassName("post__time").FirstOrDefault()?.GetAttribute("data-time_published")?.Trim();
            var date    = DateTime.MinValue;

            if (!string.IsNullOrEmpty(dateStr))
            {
                DateTime.TryParse(dateStr, out date);
            }
            var title = post.GetElementsByClassName("post__title-text").FirstOrDefault()?.TextContent?.Trim();

            var content = post.QuerySelectorAll("div").Where(m => m.LocalName == "div" &&
                                                             m.HasAttribute("id") &&
                                                             m.GetAttribute("id").StartsWith("post-content-body")).FirstOrDefault()?.TextContent?.Trim();

            HabraModel model = new HabraModel()
            {
                Author   = author,
                Content  = content,
                PostDate = date,
                Title    = title
            };

            return(model);
        }
Пример #8
0
        private static string ParseSiteContent_Cinemagia(IHtmlDocument document)
        {
            var expandableSinopsis = document.GetElementsByClassName("expand_sinopsis").Count() > 0;

            var synopsisWrapper = expandableSinopsis ? "body_sinopsis" : "short_body_sinopsis";

            var synopsisParagraph = document.QuerySelectorAll("p").FirstOrDefault(x => x.ParentElement.Id == synopsisWrapper);

            if (synopsisParagraph == null)
            {
                synopsisParagraph = document.GetElementById(synopsisWrapper);

                if (synopsisParagraph == null)
                {
                    throw new Exception("Element not found on page!");
                }
            }

            var splitString   = synopsisParagraph.InnerHtml.Split(new string[] { "<br>" }, StringSplitOptions.None);
            var processedList = new List <string>();

            foreach (var parag in splitString)
            {
                processedList.Add(parag.StripHtml().Trim());
            }

            return(string.Join(Environment.NewLine, processedList));
        }
        public HotelReview ScrapeHotelReview(IHtmlDocument htmlDoc)
        {
            var galleryReviewElement = htmlDoc.GetElementsByClassName(ScrapingConstants.GalleryReviewClass)
                                       .FirstOrDefault();

            if (galleryReviewElement == null)
            {
                return(null);
            }

            var scorewordElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewScorewordClass)
                                   .FirstOrDefault();
            var scoreValElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewScoreValClass)
                                  .FirstOrDefault();
            var outOfElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewBestScoreClass)
                               .FirstOrDefault();
            var reviewCountElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewCountClass)
                                     .FirstOrDefault();

            var scoreword              = scorewordElement?.GetElementText();
            var scoreValParseResult    = double.TryParse(scoreValElement?.GetElementText(), out var scoreVal);
            var outOfParseResult       = int.TryParse(outOfElement?.GetElementText(), out var outOf);
            var reviewCountParseResult = int.TryParse(reviewCountElement?.GetElementText(), out var reviewCount);

            return(new HotelReview {
                Scoreword = scoreword,
                Score = scoreValParseResult ? scoreVal : null,
                ScoreOutOf = outOfParseResult ? outOf : null,
                ReviewCount = reviewCountParseResult ? reviewCount : null
            });
        }
        public int?ScrapeHotelRatingStars(IHtmlDocument htmlDoc)
        {
            var ratingStarsContainer = htmlDoc.GetElementsByClassName(ScrapingConstants.HotelRatingContainerClass)
                                       .FirstOrDefault();

            var starClass = ratingStarsContainer?.GetClassForChildWhereClassStartsWith(ScrapingConstants.StarRatingClassWildcard);

            return(starClass?.GetFirstInteger());
        }
Пример #11
0
        private List <string> scrapeDoc(IHtmlDocument doc)
        {
            var rawResults = doc.GetElementsByClassName("g");
            var links      = rawResults.Select(z => z.GetElementsByTagName("a").First().GetAttribute("href"));

            Console.WriteLine(links.Count());

            return(new List <string>());
        }
Пример #12
0
 private static int GetTotalActualResults(IHtmlDocument firstPageContent)
 {
     return(Convert.ToInt32(
                firstPageContent.GetElementsByClassName("case-manager-results-summary")
                .Single()
                .TextContent
                .Split(" ", StringSplitOptions.RemoveEmptyEntries)[1]
                ));
 }
Пример #13
0
 public IEnumerable <string> ChapterUrls(IHtmlDocument doc)
 {
     if (doc is null)
     {
         return(Array.Empty <string>());
     }
     return(doc.GetElementsByClassName("chapterLink")
            .Select(a => $"https://www.mangaeden.com{a.GetAttribute("href")}")
            .ToList());
 }
Пример #14
0
        //Метод парсер HTML
        public void Parser(string document, ref int countProduct)
        {
            string                price           = null;
            string                nameProduct     = null;
            string                description     = null;
            string                id              = null;
            List <string>         listPhoto       = null;
            string                sourse          = null;
            List <ModelDatePrice> modelDatePrices = null;
            Product               product         = null;
            HtmlParser            htmlParser      = new HtmlParser();
            IHtmlDocument         htmlDocument    = htmlParser.Parse(document);

            try
            {
                var elements = htmlDocument.GetElementsByClassName("plate-box").ToList();
                for (int item = 0; item < elements.Count; item++)
                {
                    if (countProduct >= 135)
                    {
                        return;
                    }
                    id = elements[item].GetElementsByClassName("id")
                         .ToList()[0].QuerySelector("span").TextContent;
                    if (ManagerShope.listProduct.Exists(e => e.id == id))
                    {
                        continue;
                    }
                    modelDatePrices = new List <ModelDatePrice>();
                    listPhoto       = new List <string>();
                    IHtmlDocument document1 = null;
                    var           el        = elements[item].QuerySelectorAll("div")
                                              .Where(elem => elem.ClassName == "title")
                                              .ToList()[0].QuerySelector("a");
                    string url = el.OuterHtml.Remove(0, el.OuterHtml.IndexOf('\"') + 2);

                    url              = urlShope + "/" + url.Remove(url.IndexOf('>') - 1);
                    sourse           = ConnectorShope.GetContentSimplePage(url).GetAwaiter().GetResult();
                    document1        = htmlParser.Parse(sourse);
                    nameProduct      = GetNameProductProduct(el);
                    price            = GetPriceProduct(elements[item]);
                    description      = GetNameDescriptionProduct(elements[item]);
                    listPhoto        = GetNameListPhotoProduct(document1);
                    product          = new Product(price, description, listPhoto, nameProduct, id);
                    product.dataTime = GetDatePrices(price);
                    ManagerShope.listProduct.Add(product);

                    countProduct++;
                }
            }
            catch (Exception e)
            {
                throw new Exception(e.Message);
            }
        }
Пример #15
0
        public JsonResult AllMusicRatings(string id)
        {
            string rymURL = "https://www.allmusic.com/album/";
            string url    = rymURL + id;

            /* base URL */
            HttpResponseMessage response = client.GetAsync(url).Result;

            if (response.StatusCode == HttpStatusCode.OK) /* if return status is 200 */
            {
                string responseContent = response.Content.ReadAsStringAsync().Result;

                /* Refer: https://stackoverflow.com/questions/7824138/how-to-grab-elements-by-class-or-id-in-html-source-in-c */
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(responseContent);

                HttpResponseMessage request = client.GetAsync(url).Result;

                Stream responses = request.Content.ReadAsStreamAsync().Result;

                HtmlParser              parser   = new HtmlParser();
                IHtmlDocument           document = parser.ParseDocument(responses);
                AngleSharp.Dom.IElement allMusicRateElement;
                try {
                    allMusicRateElement = document.GetElementsByClassName("allmusic-rating")[0];
                } catch (Exception e)
                {
                    return(Json(new { success = false,
                                      result = e.Message }));
                }
                string siteRateString = allMusicRateElement.TextContent.Trim();

                /* check if the number is valid */
                if (!float.TryParse(siteRateString, out float siteRate))
                {
                    return(Json(new
                    {
                        success = false,
                        result = "invalid number" + siteRateString
                    }));
                }

                return(Json(new
                {
                    success = true,
                    site_rating = siteRate,
                    max_rating = 10
                }));
            }
            else
            {
                return(FailRetuenJson());
            }
        }
Пример #16
0
        internal async Task <RankData> ScrapeProfileData(string siteUrl)
        {
            try
            {
                var response = await GetDataFromUrl(siteUrl);

                HtmlParser    parser   = new HtmlParser();
                IHtmlDocument document = parser.Parse(response);
                int           rank     = -1;
                int           asp      = -1;
                var           pic      = document.GetElementById("character-portrait-image")?.Attributes?.FirstOrDefault(a => a.Name == "src")?.Value;
                var           job      = AddSpacesAfterCapitals(document.GetElementsByClassName("allstar-header-icon")?.FirstOrDefault()?.ClassList?.FirstOrDefault(s => s.Contains("actor-sprite-"))?.Substring(13));
                int.TryParse(document.GetElementsByClassName("header-zone-positions")?.FirstOrDefault()?.GetElementsByClassName("header-rank")?.FirstOrDefault()?.TextContent, out rank);
                int.TryParse(document.GetElementsByClassName("header-zone-points")?.FirstOrDefault()?.GetElementsByClassName("header-rank")?.FirstOrDefault()?.TextContent, out asp);
                var rankData = new RankData(pic, job, rank, asp);
                return(rankData);
            }
            catch (OperationCanceledException e)
            {
                //Network related error
                return(null);
            }
        }
Пример #17
0
        private IReadOnlyList <(string Href, string TestId)> GetSubNavLinks(IHtmlDocument doc)
        {
            var results = new List <(string Href, string TestId)>();

            foreach (var item in doc.GetElementsByClassName("pttcd-subnav__item"))
            {
                var anchor = item.GetElementsByTagName("a")[0];
                var href   = anchor.GetAttribute("href");
                var testId = anchor.GetAttribute("data-testid");

                results.Add((href, testId));
            }

            return(results);
        }
        public string ScrapeHotelSummary(IHtmlDocument htmlDoc)
        {
            var hotelSummaryContainer = htmlDoc.GetElementsByClassName(ScrapingConstants.HotelSummaryContainerClass)
                                        .FirstOrDefault();

            if (hotelSummaryContainer == null)
            {
                return(string.Empty);
            }

            var summaryTextParagraphs = hotelSummaryContainer.GetElementsByTagName("p");
            var summary = summaryTextParagraphs.Aggregate(string.Empty,
                                                          (curr, next) => curr += next.GetElementText());

            return(summary);
        }
Пример #19
0
        private static CaseBlobs GetCasesFromHtmlDoc(IHtmlDocument document)
        {
            CaseBlobs c = new CaseBlobs();

            var stats = document.GetElementsByClassName("maincounter-number");


            c.DateScraped         = DateTime.Now;
            c.TotalCasesBlob      = stats[0].InnerHtml;
            c.TotalDeathsBlob     = stats[1].InnerHtml;
            c.TotalRecoveriesBlob = stats[2].InnerHtml;
            c.Processed           = false;


            return(c);
        }
Пример #20
0
        private void CheckPageError(IHtmlDocument dom)
        {
            var msg         = "";
            var mainContent = dom.GetElementsByClassName("feedbackPanelERROR");

            if (mainContent.Any())
            {
                msg = mainContent.First().FirstElementChild.TextContent;
                if (msg.Contains("查無訂位紀錄"))
                {
                    throw new CritialPageErrorException(msg);
                }

                throw new ArgumentException(msg);
            }
        }
 private void SetHeadInform(string sourse, ref Shipping shipping)
 {
     try
     {
         IHtmlDocument htmlDocument = htmlParser.Parse(sourse);
         var           element      = htmlDocument.GetElementsByClassName("col-xs-12 col-sm-7 col-md-8")[0]
                                      .GetElementsByTagName("p");
         shipping.Id            = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2);
         shipping.idOrder       = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2);
         shipping.CurrentStatus = "NewLoad"; //element[1].TextContent.Remove(0, element[1].TextContent.IndexOf(": ") + 2);
         shipping.LastUpdated   = element[2].TextContent.Remove(0, element[2].TextContent.IndexOf(": ") + 2);
         shipping.CDReference   = element[3].TextContent.Remove(0, element[3].TextContent.IndexOf(": ") + 2);
     }
     catch (Exception)
     {
     }
 }
Пример #22
0
        private static string ParseSiteContent_Imdb(IHtmlDocument document)
        {
            var summaryDiv = document.GetElementsByClassName("summary_text").FirstOrDefault();

            if (summaryDiv == null)
            {
                throw new Exception("Element not found on page!");
            }

            return
                (summaryDiv.InnerHtml
                 .Replace("<br>", Environment.NewLine)
                 .Replace("&lt;", "<")
                 .Replace("&gt;", ">")
                 .StripHtml()
                 .Trim());
        }
Пример #23
0
        private static string ParseSiteContent_Filmvandaag(IHtmlDocument document)
        {
            var descriptionDiv = document.GetElementsByClassName("synopsis").FirstOrDefault();

            if (descriptionDiv == null)
            {
                throw new Exception("Element not found on page!");
            }

            return
                (descriptionDiv.InnerHtml
                 .Replace("<br>", Environment.NewLine)
                 .Replace("&lt;", "<")
                 .Replace("&gt;", ">")
                 .StripHtml()
                 .Trim());
        }
Пример #24
0
        public VisualCronModel Parse(IHtmlDocument document)
        {
            var post = document.GetElementsByClassName("post").FirstOrDefault();

            if (post == null)
            {
                return(null);
            }

            // get title
            var title = post.GetElementsByClassName("post-title").FirstOrDefault()?.TextContent?.Trim();

            // get post date
            var dateStr = post.GetElementsByClassName("post-date").FirstOrDefault()?.TextContent?.Trim();
            var date    = DateTime.MinValue;

            if (!string.IsNullOrEmpty(dateStr))
            {
                DateTime.TryParse(dateStr, out date);
            }

            // get post author
            var author = post.GetElementsByClassName("post-author").FirstOrDefault()?.TextContent?.Trim();

            // get post category
            var category = post.GetElementsByClassName("post-category").FirstOrDefault()?.TextContent?.Trim();

            // get post content
            var content = post.GetElementsByClassName("post-body text").FirstOrDefault()?.TextContent?.Trim();

            // get post tags
            var tags = post.GetElementsByClassName("post-tags").FirstOrDefault()?.QuerySelectorAll("a").Select(t => t.TextContent?.Trim()).ToArray();

            VisualCronModel model = new VisualCronModel()
            {
                Author   = author,
                Content  = content,
                PostDate = date,
                Title    = title,
                Category = category,
                Tags     = tags
            };

            return(model);
        }
Пример #25
0
        private string GetPage(IHtmlDocument parseElement)
        {
            var pagination = parseElement.GetElementsByClassName("pagination-number-list");

            if (pagination.Length > 0)
            {
                var pages = pagination[0].GetElementsByTagName("li");
                for (int i = 0; i < pages.Length; i++)
                {
                    if (pages[i].ClassList.Contains("active"))
                    {
                        return(pages[++i].GetElementsByTagName("a")[0].GetElementsByTagName("span")[0].TextContent);
                    }
                }
            }

            return("");
        }
Пример #26
0
 private void SetHeadInform(string sourse, ref Shipping shipping)
 {
     try
     {
         IHtmlDocument htmlDocument = htmlParser.Parse(sourse);
         var           element      = htmlDocument.GetElementsByClassName("col-xs-12 col-sm-7 col-md-8")[0]
                                      .GetElementsByTagName("p");
         shipping.Id            = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2);
         shipping.idOrder       = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2);
         shipping.CurrentStatus = "NewLoad"; //element[1].TextContent.Remove(0, element[1].TextContent.IndexOf(": ") + 2);
         shipping.LastUpdated   = element[2].TextContent.Remove(0, element[2].TextContent.IndexOf(": ") + 2);
         shipping.CDReference   = element[3].TextContent.Remove(0, element[3].TextContent.IndexOf(": ") + 2);
     }
     catch (Exception)
     {
         LogEr.Logerr("Error", $"some data is not added, Load id {shipping.Id}, Url: {shipping.UrlReqvest}", "SetHeadInform", DateTime.Now.ToShortTimeString());
     }
 }
Пример #27
0
        public JsonResult ScrapeAlbumChart()
        {
            string topAlbumsUrl = "https://www.billboard.com/charts/current-albums";

            Billboard_Album[] topAlbums = new Billboard_Album[100];

            HttpResponseMessage request = client.GetAsync(topAlbumsUrl).Result;

            Stream response = request.Content.ReadAsStreamAsync().Result;

            HtmlParser    parser   = new HtmlParser();
            IHtmlDocument document = parser.ParseDocument(response);

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> albums = document.GetElementsByClassName("chart-list-item__first-row chart-list-item__cursor-pointer");

            for (int i = 0; i < albums.Length; i++)
            {
                string title  = albums[i].GetElementsByClassName("chart-list-item__title-text")[0].TextContent.Trim();
                string artist = "";

                // Some albums have a link tag, some don't.
                if (albums[i].GetElementsByClassName("chart-list-item__artist")[0].ChildElementCount > 0) //sometimes there is an <a> tag
                {
                    artist = albums[i].GetElementsByClassName("chart-list-item__artist")[0].FirstElementChild.TextContent.Trim();
                }
                else
                {
                    artist = albums[i].GetElementsByClassName("chart-list-item__artist")[0].TextContent.Trim();
                }

                var ab = new Billboard_Album
                {
                    Title  = title,
                    Artist = artist
                };
                topAlbums[i] = ab;
            }

            return(Json(new
            {
                success = true,
                albums = topAlbums
            }));
        }
Пример #28
0
        public bool CheckIsNextPage(string sourse)
        {
            bool          isPageNext   = false;
            IHtmlDocument htmlDocument = htmlParser.Parse(sourse);
            var           elements     = htmlDocument.GetElementsByClassName("col-xs-6 text-center");

            if (elements != null)
            {
                string elementCountPageStr = elements[0].InnerHtml;
                string countOrderStr       = elementCountPageStr.Remove(0, elementCountPageStr.IndexOf("-") + 1);
                countOrderStr = countOrderStr.Remove(countOrderStr.IndexOf(" "));
                string fullCountOrderStr = elementCountPageStr.Remove(0, elementCountPageStr.IndexOf("of ") + 3);
                fullCountOrderStr = fullCountOrderStr.Remove(2);
                if (fullCountOrderStr != countOrderStr)
                {
                    isPageNext = true;
                }
            }
            return(isPageNext);
        }
Пример #29
0
        public string Scrap(string html)
        {
            HtmlParser    parser = new HtmlParser();
            IHtmlDocument doc    = parser.Parse(html);
            IHtmlCollection <IElement> docByPriceClasses = doc.GetElementsByClassName("price");

            if (docByPriceClasses.Count() == 0)
            {
                return("");
            }

            String text = docByPriceClasses.First()
                          .GetElementsByTagName("span")
                          .First()
                          .TextContent;

            return(String.Join("",
                               text.Where(c => char.IsNumber(c))
                               .Select(c => c.ToString())
                               ));
        }
Пример #30
0
        private int AddMeal(IHtmlDocument doc)
        {
            var title = doc.GetElementsByClassName("recipeTitle").Select(item =>
            {
                return(item.TextContent.Trim());
            }).First();

            var ExistMeal = context.Meal.SingleOrDefault(m => m.Url == url);

            if (ExistMeal != null)
            {
                return(ExistMeal.Id);
            }

            Meal meal = new Meal {
                Name = title, Url = url
            };

            context.Add(meal);
            context.SaveChanges();
            return(context.Meal.Single(m => m.Url == url).Id);
        }