private IList <Pages.PageComponents.ShopMenu> prepareMenu() { IList <Pages.PageComponents.ShopMenu> menuDTOArray = new List <Pages.PageComponents.ShopMenu>(); IElement menuList = document.GetElementsByClassName("cs-nav")[0]; int menuItemsCount = menuList.ChildElementCount; for (int i = 1; i < menuItemsCount; i++) { IElement menuElement = (IElement)menuList.ChildNodes[i]; IElement aHrefelem = (IElement)menuElement.ChildNodes[0]; HTMLAnchorDetail ancdDetail = extractor.extractAchrorInfo(aHrefelem); menuDTOArray.Add(new Pages.PageComponents.ShopMenu { Href = invokedPage.ShopUrl + ancdDetail.Href, ItemName = ancdDetail.Name }); } menuDTOArray.Add( new Pages.PageComponents.ShopMenu { ItemName = "Auto", Href = "http://www.wartonlogo.com/Projectors/Gobos%20Auto-switch/" } ); return(menuDTOArray); }
private static int GetNumberOfDisplayedResults(IHtmlDocument pageContent) { var regionResults = pageContent.GetElementsByClassName("directory-region-search-result-item"); var serviceResults = pageContent.GetElementsByClassName("directory-service-search-result-item"); var hospitalResults = pageContent.GetElementsByClassName("directory-hospital-search-result-item"); var userResults = pageContent.GetElementsByClassName("directory-user-search-result-item"); return(regionResults.Length + serviceResults.Length + hospitalResults.Length + userResults.Length); }
GetDisplayedResultsTextContent(IHtmlDocument pageContent) { var regionResults = pageContent.GetElementsByClassName("directory-region-search-result-item") .Select(e => e.TextContent); var serviceResults = pageContent.GetElementsByClassName("directory-service-search-result-item") .Select(e => e.TextContent); var hospitalResults = pageContent.GetElementsByClassName("directory-hospital-search-result-item") .Select(e => e.TextContent); var userResults = pageContent.GetElementsByClassName("directory-user-search-result-item") .Select(e => e.TextContent); return(regionResults, serviceResults, hospitalResults, userResults); }
private IHtmlCollection <IElement> GetOpcoes(IHtmlDocument document) { return(document.GetElementsByClassName("" + "collection-item waves-effect waves-on-white-bg grey-text " + "text-darken-4 d-flex flex-wrap flex-md-nowrap justify-between " + "align-items-center p-relative")); }
public string ScrapeMostRecentBooking(IHtmlDocument htmlDoc) { var lastBookingElement = htmlDoc.GetElementsByClassName(ScrapingConstants.LastBookingClass) .FirstOrDefault(); return(lastBookingElement?.GetElementText() ?? string.Empty); }
private IHtmlCollection <IElement> GetPageWithProduct(string url, out IHtmlDocument htmlDocument) { foreach (var proxy in proxyList) { try { WebRequest WR = WebRequest.Create(url); WR.Method = "GET"; string[] fulladress = proxy.Split(":"); var(adress, port) = (fulladress[0], int.Parse(fulladress[1])); WebProxy myproxy = new WebProxy(adress, port); myproxy.BypassProxyOnLocal = false; WR.Proxy = myproxy; WebResponse response = WR.GetResponse(); string html; using (Stream stream = response.GetResponseStream()) { using (StreamReader reader = new StreamReader(stream)) { html = reader.ReadToEnd(); } } HtmlParser parser = new HtmlParser(); htmlDocument = parser.ParseDocument(html); return(htmlDocument.GetElementsByClassName("g-i-tile-catalog")); } catch (Exception ex) { } } throw new Exception("Proxy isn`t working"); }
public HabraModel Parse(IHtmlDocument document) { var post = document.GetElementsByClassName("post__wrapper").FirstOrDefault(); if (post == null) { return(null); } var author = post.GetElementsByClassName("user-info__nickname user-info__nickname_small").FirstOrDefault()?.TextContent?.Trim(); var dateStr = post.GetElementsByClassName("post__time").FirstOrDefault()?.GetAttribute("data-time_published")?.Trim(); var date = DateTime.MinValue; if (!string.IsNullOrEmpty(dateStr)) { DateTime.TryParse(dateStr, out date); } var title = post.GetElementsByClassName("post__title-text").FirstOrDefault()?.TextContent?.Trim(); var content = post.QuerySelectorAll("div").Where(m => m.LocalName == "div" && m.HasAttribute("id") && m.GetAttribute("id").StartsWith("post-content-body")).FirstOrDefault()?.TextContent?.Trim(); HabraModel model = new HabraModel() { Author = author, Content = content, PostDate = date, Title = title }; return(model); }
private static string ParseSiteContent_Cinemagia(IHtmlDocument document) { var expandableSinopsis = document.GetElementsByClassName("expand_sinopsis").Count() > 0; var synopsisWrapper = expandableSinopsis ? "body_sinopsis" : "short_body_sinopsis"; var synopsisParagraph = document.QuerySelectorAll("p").FirstOrDefault(x => x.ParentElement.Id == synopsisWrapper); if (synopsisParagraph == null) { synopsisParagraph = document.GetElementById(synopsisWrapper); if (synopsisParagraph == null) { throw new Exception("Element not found on page!"); } } var splitString = synopsisParagraph.InnerHtml.Split(new string[] { "<br>" }, StringSplitOptions.None); var processedList = new List <string>(); foreach (var parag in splitString) { processedList.Add(parag.StripHtml().Trim()); } return(string.Join(Environment.NewLine, processedList)); }
public HotelReview ScrapeHotelReview(IHtmlDocument htmlDoc) { var galleryReviewElement = htmlDoc.GetElementsByClassName(ScrapingConstants.GalleryReviewClass) .FirstOrDefault(); if (galleryReviewElement == null) { return(null); } var scorewordElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewScorewordClass) .FirstOrDefault(); var scoreValElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewScoreValClass) .FirstOrDefault(); var outOfElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewBestScoreClass) .FirstOrDefault(); var reviewCountElement = galleryReviewElement.GetElementsByClassName(ScrapingConstants.ReviewCountClass) .FirstOrDefault(); var scoreword = scorewordElement?.GetElementText(); var scoreValParseResult = double.TryParse(scoreValElement?.GetElementText(), out var scoreVal); var outOfParseResult = int.TryParse(outOfElement?.GetElementText(), out var outOf); var reviewCountParseResult = int.TryParse(reviewCountElement?.GetElementText(), out var reviewCount); return(new HotelReview { Scoreword = scoreword, Score = scoreValParseResult ? scoreVal : null, ScoreOutOf = outOfParseResult ? outOf : null, ReviewCount = reviewCountParseResult ? reviewCount : null }); }
public int?ScrapeHotelRatingStars(IHtmlDocument htmlDoc) { var ratingStarsContainer = htmlDoc.GetElementsByClassName(ScrapingConstants.HotelRatingContainerClass) .FirstOrDefault(); var starClass = ratingStarsContainer?.GetClassForChildWhereClassStartsWith(ScrapingConstants.StarRatingClassWildcard); return(starClass?.GetFirstInteger()); }
private List <string> scrapeDoc(IHtmlDocument doc) { var rawResults = doc.GetElementsByClassName("g"); var links = rawResults.Select(z => z.GetElementsByTagName("a").First().GetAttribute("href")); Console.WriteLine(links.Count()); return(new List <string>()); }
private static int GetTotalActualResults(IHtmlDocument firstPageContent) { return(Convert.ToInt32( firstPageContent.GetElementsByClassName("case-manager-results-summary") .Single() .TextContent .Split(" ", StringSplitOptions.RemoveEmptyEntries)[1] )); }
public IEnumerable <string> ChapterUrls(IHtmlDocument doc) { if (doc is null) { return(Array.Empty <string>()); } return(doc.GetElementsByClassName("chapterLink") .Select(a => $"https://www.mangaeden.com{a.GetAttribute("href")}") .ToList()); }
//Метод парсер HTML public void Parser(string document, ref int countProduct) { string price = null; string nameProduct = null; string description = null; string id = null; List <string> listPhoto = null; string sourse = null; List <ModelDatePrice> modelDatePrices = null; Product product = null; HtmlParser htmlParser = new HtmlParser(); IHtmlDocument htmlDocument = htmlParser.Parse(document); try { var elements = htmlDocument.GetElementsByClassName("plate-box").ToList(); for (int item = 0; item < elements.Count; item++) { if (countProduct >= 135) { return; } id = elements[item].GetElementsByClassName("id") .ToList()[0].QuerySelector("span").TextContent; if (ManagerShope.listProduct.Exists(e => e.id == id)) { continue; } modelDatePrices = new List <ModelDatePrice>(); listPhoto = new List <string>(); IHtmlDocument document1 = null; var el = elements[item].QuerySelectorAll("div") .Where(elem => elem.ClassName == "title") .ToList()[0].QuerySelector("a"); string url = el.OuterHtml.Remove(0, el.OuterHtml.IndexOf('\"') + 2); url = urlShope + "/" + url.Remove(url.IndexOf('>') - 1); sourse = ConnectorShope.GetContentSimplePage(url).GetAwaiter().GetResult(); document1 = htmlParser.Parse(sourse); nameProduct = GetNameProductProduct(el); price = GetPriceProduct(elements[item]); description = GetNameDescriptionProduct(elements[item]); listPhoto = GetNameListPhotoProduct(document1); product = new Product(price, description, listPhoto, nameProduct, id); product.dataTime = GetDatePrices(price); ManagerShope.listProduct.Add(product); countProduct++; } } catch (Exception e) { throw new Exception(e.Message); } }
public JsonResult AllMusicRatings(string id) { string rymURL = "https://www.allmusic.com/album/"; string url = rymURL + id; /* base URL */ HttpResponseMessage response = client.GetAsync(url).Result; if (response.StatusCode == HttpStatusCode.OK) /* if return status is 200 */ { string responseContent = response.Content.ReadAsStringAsync().Result; /* Refer: https://stackoverflow.com/questions/7824138/how-to-grab-elements-by-class-or-id-in-html-source-in-c */ HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(responseContent); HttpResponseMessage request = client.GetAsync(url).Result; Stream responses = request.Content.ReadAsStreamAsync().Result; HtmlParser parser = new HtmlParser(); IHtmlDocument document = parser.ParseDocument(responses); AngleSharp.Dom.IElement allMusicRateElement; try { allMusicRateElement = document.GetElementsByClassName("allmusic-rating")[0]; } catch (Exception e) { return(Json(new { success = false, result = e.Message })); } string siteRateString = allMusicRateElement.TextContent.Trim(); /* check if the number is valid */ if (!float.TryParse(siteRateString, out float siteRate)) { return(Json(new { success = false, result = "invalid number" + siteRateString })); } return(Json(new { success = true, site_rating = siteRate, max_rating = 10 })); } else { return(FailRetuenJson()); } }
internal async Task <RankData> ScrapeProfileData(string siteUrl) { try { var response = await GetDataFromUrl(siteUrl); HtmlParser parser = new HtmlParser(); IHtmlDocument document = parser.Parse(response); int rank = -1; int asp = -1; var pic = document.GetElementById("character-portrait-image")?.Attributes?.FirstOrDefault(a => a.Name == "src")?.Value; var job = AddSpacesAfterCapitals(document.GetElementsByClassName("allstar-header-icon")?.FirstOrDefault()?.ClassList?.FirstOrDefault(s => s.Contains("actor-sprite-"))?.Substring(13)); int.TryParse(document.GetElementsByClassName("header-zone-positions")?.FirstOrDefault()?.GetElementsByClassName("header-rank")?.FirstOrDefault()?.TextContent, out rank); int.TryParse(document.GetElementsByClassName("header-zone-points")?.FirstOrDefault()?.GetElementsByClassName("header-rank")?.FirstOrDefault()?.TextContent, out asp); var rankData = new RankData(pic, job, rank, asp); return(rankData); } catch (OperationCanceledException e) { //Network related error return(null); } }
private IReadOnlyList <(string Href, string TestId)> GetSubNavLinks(IHtmlDocument doc) { var results = new List <(string Href, string TestId)>(); foreach (var item in doc.GetElementsByClassName("pttcd-subnav__item")) { var anchor = item.GetElementsByTagName("a")[0]; var href = anchor.GetAttribute("href"); var testId = anchor.GetAttribute("data-testid"); results.Add((href, testId)); } return(results); }
public string ScrapeHotelSummary(IHtmlDocument htmlDoc) { var hotelSummaryContainer = htmlDoc.GetElementsByClassName(ScrapingConstants.HotelSummaryContainerClass) .FirstOrDefault(); if (hotelSummaryContainer == null) { return(string.Empty); } var summaryTextParagraphs = hotelSummaryContainer.GetElementsByTagName("p"); var summary = summaryTextParagraphs.Aggregate(string.Empty, (curr, next) => curr += next.GetElementText()); return(summary); }
private static CaseBlobs GetCasesFromHtmlDoc(IHtmlDocument document) { CaseBlobs c = new CaseBlobs(); var stats = document.GetElementsByClassName("maincounter-number"); c.DateScraped = DateTime.Now; c.TotalCasesBlob = stats[0].InnerHtml; c.TotalDeathsBlob = stats[1].InnerHtml; c.TotalRecoveriesBlob = stats[2].InnerHtml; c.Processed = false; return(c); }
private void CheckPageError(IHtmlDocument dom) { var msg = ""; var mainContent = dom.GetElementsByClassName("feedbackPanelERROR"); if (mainContent.Any()) { msg = mainContent.First().FirstElementChild.TextContent; if (msg.Contains("查無訂位紀錄")) { throw new CritialPageErrorException(msg); } throw new ArgumentException(msg); } }
private void SetHeadInform(string sourse, ref Shipping shipping) { try { IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var element = htmlDocument.GetElementsByClassName("col-xs-12 col-sm-7 col-md-8")[0] .GetElementsByTagName("p"); shipping.Id = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2); shipping.idOrder = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2); shipping.CurrentStatus = "NewLoad"; //element[1].TextContent.Remove(0, element[1].TextContent.IndexOf(": ") + 2); shipping.LastUpdated = element[2].TextContent.Remove(0, element[2].TextContent.IndexOf(": ") + 2); shipping.CDReference = element[3].TextContent.Remove(0, element[3].TextContent.IndexOf(": ") + 2); } catch (Exception) { } }
private static string ParseSiteContent_Imdb(IHtmlDocument document) { var summaryDiv = document.GetElementsByClassName("summary_text").FirstOrDefault(); if (summaryDiv == null) { throw new Exception("Element not found on page!"); } return (summaryDiv.InnerHtml .Replace("<br>", Environment.NewLine) .Replace("<", "<") .Replace(">", ">") .StripHtml() .Trim()); }
private static string ParseSiteContent_Filmvandaag(IHtmlDocument document) { var descriptionDiv = document.GetElementsByClassName("synopsis").FirstOrDefault(); if (descriptionDiv == null) { throw new Exception("Element not found on page!"); } return (descriptionDiv.InnerHtml .Replace("<br>", Environment.NewLine) .Replace("<", "<") .Replace(">", ">") .StripHtml() .Trim()); }
public VisualCronModel Parse(IHtmlDocument document) { var post = document.GetElementsByClassName("post").FirstOrDefault(); if (post == null) { return(null); } // get title var title = post.GetElementsByClassName("post-title").FirstOrDefault()?.TextContent?.Trim(); // get post date var dateStr = post.GetElementsByClassName("post-date").FirstOrDefault()?.TextContent?.Trim(); var date = DateTime.MinValue; if (!string.IsNullOrEmpty(dateStr)) { DateTime.TryParse(dateStr, out date); } // get post author var author = post.GetElementsByClassName("post-author").FirstOrDefault()?.TextContent?.Trim(); // get post category var category = post.GetElementsByClassName("post-category").FirstOrDefault()?.TextContent?.Trim(); // get post content var content = post.GetElementsByClassName("post-body text").FirstOrDefault()?.TextContent?.Trim(); // get post tags var tags = post.GetElementsByClassName("post-tags").FirstOrDefault()?.QuerySelectorAll("a").Select(t => t.TextContent?.Trim()).ToArray(); VisualCronModel model = new VisualCronModel() { Author = author, Content = content, PostDate = date, Title = title, Category = category, Tags = tags }; return(model); }
private string GetPage(IHtmlDocument parseElement) { var pagination = parseElement.GetElementsByClassName("pagination-number-list"); if (pagination.Length > 0) { var pages = pagination[0].GetElementsByTagName("li"); for (int i = 0; i < pages.Length; i++) { if (pages[i].ClassList.Contains("active")) { return(pages[++i].GetElementsByTagName("a")[0].GetElementsByTagName("span")[0].TextContent); } } } return(""); }
private void SetHeadInform(string sourse, ref Shipping shipping) { try { IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var element = htmlDocument.GetElementsByClassName("col-xs-12 col-sm-7 col-md-8")[0] .GetElementsByTagName("p"); shipping.Id = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2); shipping.idOrder = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(": ") + 2); shipping.CurrentStatus = "NewLoad"; //element[1].TextContent.Remove(0, element[1].TextContent.IndexOf(": ") + 2); shipping.LastUpdated = element[2].TextContent.Remove(0, element[2].TextContent.IndexOf(": ") + 2); shipping.CDReference = element[3].TextContent.Remove(0, element[3].TextContent.IndexOf(": ") + 2); } catch (Exception) { LogEr.Logerr("Error", $"some data is not added, Load id {shipping.Id}, Url: {shipping.UrlReqvest}", "SetHeadInform", DateTime.Now.ToShortTimeString()); } }
public JsonResult ScrapeAlbumChart() { string topAlbumsUrl = "https://www.billboard.com/charts/current-albums"; Billboard_Album[] topAlbums = new Billboard_Album[100]; HttpResponseMessage request = client.GetAsync(topAlbumsUrl).Result; Stream response = request.Content.ReadAsStreamAsync().Result; HtmlParser parser = new HtmlParser(); IHtmlDocument document = parser.ParseDocument(response); AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> albums = document.GetElementsByClassName("chart-list-item__first-row chart-list-item__cursor-pointer"); for (int i = 0; i < albums.Length; i++) { string title = albums[i].GetElementsByClassName("chart-list-item__title-text")[0].TextContent.Trim(); string artist = ""; // Some albums have a link tag, some don't. if (albums[i].GetElementsByClassName("chart-list-item__artist")[0].ChildElementCount > 0) //sometimes there is an <a> tag { artist = albums[i].GetElementsByClassName("chart-list-item__artist")[0].FirstElementChild.TextContent.Trim(); } else { artist = albums[i].GetElementsByClassName("chart-list-item__artist")[0].TextContent.Trim(); } var ab = new Billboard_Album { Title = title, Artist = artist }; topAlbums[i] = ab; } return(Json(new { success = true, albums = topAlbums })); }
public bool CheckIsNextPage(string sourse) { bool isPageNext = false; IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var elements = htmlDocument.GetElementsByClassName("col-xs-6 text-center"); if (elements != null) { string elementCountPageStr = elements[0].InnerHtml; string countOrderStr = elementCountPageStr.Remove(0, elementCountPageStr.IndexOf("-") + 1); countOrderStr = countOrderStr.Remove(countOrderStr.IndexOf(" ")); string fullCountOrderStr = elementCountPageStr.Remove(0, elementCountPageStr.IndexOf("of ") + 3); fullCountOrderStr = fullCountOrderStr.Remove(2); if (fullCountOrderStr != countOrderStr) { isPageNext = true; } } return(isPageNext); }
public string Scrap(string html) { HtmlParser parser = new HtmlParser(); IHtmlDocument doc = parser.Parse(html); IHtmlCollection <IElement> docByPriceClasses = doc.GetElementsByClassName("price"); if (docByPriceClasses.Count() == 0) { return(""); } String text = docByPriceClasses.First() .GetElementsByTagName("span") .First() .TextContent; return(String.Join("", text.Where(c => char.IsNumber(c)) .Select(c => c.ToString()) )); }
private int AddMeal(IHtmlDocument doc) { var title = doc.GetElementsByClassName("recipeTitle").Select(item => { return(item.TextContent.Trim()); }).First(); var ExistMeal = context.Meal.SingleOrDefault(m => m.Url == url); if (ExistMeal != null) { return(ExistMeal.Id); } Meal meal = new Meal { Name = title, Url = url }; context.Add(meal); context.SaveChanges(); return(context.Meal.Single(m => m.Url == url).Id); }