private static string ParseSiteContent_Cinemagia(IHtmlDocument document) { var expandableSinopsis = document.GetElementsByClassName("expand_sinopsis").Count() > 0; var synopsisWrapper = expandableSinopsis ? "body_sinopsis" : "short_body_sinopsis"; var synopsisParagraph = document.QuerySelectorAll("p").FirstOrDefault(x => x.ParentElement.Id == synopsisWrapper); if (synopsisParagraph == null) { synopsisParagraph = document.GetElementById(synopsisWrapper); if (synopsisParagraph == null) { throw new Exception("Element not found on page!"); } } var splitString = synopsisParagraph.InnerHtml.Split(new string[] { "<br>" }, StringSplitOptions.None); var processedList = new List <string>(); foreach (var parag in splitString) { processedList.Add(parag.StripHtml().Trim()); } return(string.Join(Environment.NewLine, processedList)); }
private void ParseForum(IHtmlDocument document) { IHtmlCollection <IElement> forumsElements = document.GetElementById("ctl00_cphRoblox_PostView1_ctl00_Whereami1").QuerySelectorAll("nobr > a"); string forumUrl = forumsElements[forumsElements.Length - 1].Attributes["href"].Value; string formName = forumsElements[forumsElements.Length - 1].TextContent; MatchCollection forumMatches = Regex.Matches(forumUrl, "[0-9]+", RegexOptions.IgnoreCase); string forumGroupUrl; string formGroupName; int forumGroupId; MatchCollection forumGroupMatches; if (forumsElements.Length == 2) { formGroupName = "ROBLOX Forum"; forumGroupId = 0; } else { forumGroupUrl = forumsElements[1].Attributes["href"].Value; forumGroupMatches = Regex.Matches(forumGroupUrl, "[0-9]+", RegexOptions.IgnoreCase); formGroupName = forumsElements[1].TextContent; forumGroupId = int.Parse(forumGroupMatches[0].Value); } Forum = new RobloxForum(forumGroupId, formName); ForumGroup = new RobloxForumGroup(int.Parse(forumMatches[0].Value), formGroupName); }
public override WorldsEndMusic Parse(IHtmlDocument document) { if (document == null) { throw new ArgumentNullException(nameof(document)); } if (!IsValidDocument(document)) { return(null); } var musicDetail = document.GetElementById("inner"); if (musicDetail == null) { return(null); } var worldsEndMusic = new WorldsEndMusic(); worldsEndMusic.Units = GetUnits(musicDetail); return(worldsEndMusic); }
private void SetDeliveryInform(string sourse, ref Shipping shipping) { try { IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var element = htmlDocument.GetElementById("sheetBottom") .GetElementsByClassName("col-xs-12 col-sm-6 col-md-4")[1] .GetElementsByClassName("panel panel-default")[0] .GetElementsByClassName("panel-body")[0].Children; shipping.ContactNameD = element[0].TextContent.Trim(); shipping.ContactNameD = shipping.ContactNameD.Remove(shipping.ContactNameD.IndexOf("\n")); shipping.AddresD = element[0].TextContent.Trim(); shipping.AddresD = shipping.AddresD.Remove(0, shipping.ContactNameD.Length).Trim(); if (shipping.AddresD[0] == '(') { shipping.AddresD = shipping.AddresD.Remove(0, shipping.AddresD.IndexOf(')') + 2).Trim(); } shipping.AddresD = shipping.AddresD.Remove(shipping.AddresD.IndexOf("\n")).Trim(); shipping.CityD = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(shipping.AddresD) + shipping.AddresD.Length).Trim(); shipping.CityD = shipping.CityD.Remove(shipping.CityD.IndexOf(',')).Trim(); if (shipping.CityD.IndexOf(shipping.AddresD) != -1) { shipping.CityD = shipping.CityD.Replace(shipping.AddresD, "").Trim(); } shipping.StateD = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(shipping.CityD) + shipping.CityD.Length + 2).Trim(); shipping.StateD = shipping.StateD.Remove(2); shipping.ZipD = element[0].TextContent.Remove(0, element[0].TextContent.LastIndexOf(shipping.StateD) + 2).Trim(); shipping.PhoneD = element[element.Length - 1].TextContent; } catch (Exception e) { } }
private void SetVehicleInform(string sourse, ref Shipping shipping) { try { shipping.VehiclwInformations = new List <VehiclwInformation>(); VehiclwInformation vehiclwInformation = new VehiclwInformation(); IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var vehicles = htmlDocument.GetElementById("sheetBottom") .GetElementsByClassName("panel panel-default")[0] .GetElementsByClassName("panel-body table-responsive")[0] .GetElementsByClassName("table table-striped table-hover")[0] .GetElementsByTagName("tbody")[0].GetElementsByTagName("tr"); foreach (var vehicle in vehicles) { vehiclwInformation = new VehiclwInformation(); vehiclwInformation.Year = vehicle.Children[1].TextContent.Trim(); vehiclwInformation.Make = vehicle.Children[2].TextContent.Trim(); vehiclwInformation.Model = vehicle.Children[3].TextContent.Trim(); vehiclwInformation.Type = vehicle.Children[4].TextContent.Trim(); vehiclwInformation.Color = vehicle.Children[5].TextContent.Trim(); vehiclwInformation.Plate = vehicle.Children[6].TextContent.Trim(); vehiclwInformation.VIN = vehicle.Children[7].TextContent.Trim(); vehiclwInformation.Lot = vehicle.Children[8].TextContent.Trim(); vehiclwInformation.AdditionalInfo = vehicle.Children[9].TextContent.Trim(); shipping.VehiclwInformations.Add(vehiclwInformation); } } catch (Exception) { } }
public override MusicRecent Parse(IHtmlDocument document) { if (document == null) { throw new ArgumentNullException(nameof(document)); } if (!IsValidDocument(document)) { return(null); } var musicDetail = document.GetElementById("inner"); if (musicDetail == null) { return(null); } var MusicRecent = new MusicRecent(); MusicRecent.Units = GetUnits(musicDetail); return(MusicRecent); }
public override AimeList Parse(IHtmlDocument document) { if (document == null) { throw new ArgumentNullException(nameof(document)); } if (!IsValidDocument(document)) { return(null); } var content = document.GetElementById("inner"); if (content == null) { return(null); } var parseData = new AimeList(); parseData.Units = GetUnits(content); return(parseData); }
//Parses the __VIEWSTATE, __VIEWSTATEGENERATOR, __EVENTVALIDATION, and __EVENTARGUMENT private void ParseRequestInputParams(IHtmlDocument document) { string eventArgument = ""; string viewState = document.GetElementById("__VIEWSTATE").Attributes["value"].Value; string viewStateGenerator = document.GetElementById("__VIEWSTATEGENERATOR").Attributes["value"].Value; string eventValidation = document.GetElementById("__EVENTVALIDATION").Attributes["value"].Value; IElement eventArgumentNode = document.GetElementById("__EVENTTARGET"); if (eventArgumentNode != null) { eventArgument = eventArgumentNode.Attributes["value"].Value; } Params = new RobloxRequestParams(eventArgument, viewState, viewStateGenerator, eventValidation); }
private static async Task DownloadImagesAsync(IHtmlDocument htmlDocument, string type) { var content = htmlDocument.GetElementById("mw-category-media"); var files = content.GetElementsByClassName("galleryfilename"); foreach (var file in files) { var fileName = file.TextContent.Replace(" ", "_"); if (fileName.Contains("(Small)")) { Console.WriteLine($"{fileName} skipped."); continue; } if (fileName == "100018_01_r05.png") { Console.WriteLine($"{fileName} skipped."); continue; } var filePath = Path.Combine(imageDirectoryPath, type, fileName); if (File.Exists(filePath)) { Console.WriteLine($"{fileName} exists."); continue; } var filePageUri = new Uri(gamepediaUrl, file.GetAttribute("href")); var imageUri = await GetImagePathAsync(filePageUri); await DownloadImageAsync(imageUri, filePath).ConfigureAwait(false); Console.WriteLine($"{fileName} downloaded."); // 負荷を与えないように少し待つ await Task.Delay(TimeSpan.FromSeconds(1)).ConfigureAwait(false); } }
private void SetDeliveryInform(string sourse, ref Shipping shipping) { try { IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var element = htmlDocument.GetElementById("sheetBottom") .GetElementsByClassName("col-xs-12 col-sm-6 col-md-4")[1] .GetElementsByClassName("panel panel-default")[0] .GetElementsByClassName("panel-body")[0].Children; shipping.ContactNameD = element[0].TextContent.Trim(); shipping.ContactNameD = shipping.ContactNameD.Remove(shipping.ContactNameD.IndexOf("\n")); shipping.AddresD = element[0].TextContent.Trim(); shipping.AddresD = shipping.AddresD.Remove(0, shipping.ContactNameD.Length).Trim(); if (shipping.AddresD[0] == '(') { shipping.AddresD = shipping.AddresD.Remove(0, shipping.AddresD.IndexOf(')') + 2).Trim(); } shipping.AddresD = shipping.AddresD.Remove(shipping.AddresD.IndexOf("\n")).Trim(); shipping.CityD = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(shipping.AddresD) + shipping.AddresD.Length).Trim(); shipping.CityD = shipping.CityD.Remove(shipping.CityD.IndexOf(',')).Trim(); if (shipping.CityD.IndexOf(shipping.AddresD) != -1) { shipping.CityD = shipping.CityD.Replace(shipping.AddresD, "").Trim(); } shipping.StateD = element[0].TextContent.Remove(0, element[0].TextContent.IndexOf(shipping.CityD) + shipping.CityD.Length + 2).Trim(); shipping.StateD = shipping.StateD.Remove(2); shipping.ZipD = element[0].TextContent.Remove(0, element[0].TextContent.LastIndexOf(shipping.StateD) + 2).Trim(); shipping.PhoneD = element[element.Length - 1].TextContent; } catch (Exception e) { LogEr.Logerr("Error", $"some data is not added, Load id {shipping.Id}, Url: {shipping.UrlReqvest}", "SetDeliveryInform", DateTime.Now.ToShortTimeString()); } }
private void SetVehicleInform(string sourse, ref Shipping shipping) { try { shipping.VehiclwInformations = new List <VehiclwInformation>(); VehiclwInformation vehiclwInformation = new VehiclwInformation(); IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var vehicles = htmlDocument.GetElementById("sheetBottom") .GetElementsByClassName("panel panel-default")[0] .GetElementsByClassName("panel-body table-responsive")[0] .GetElementsByClassName("table table-striped table-hover")[0] .GetElementsByTagName("tbody")[0].GetElementsByTagName("tr"); foreach (var vehicle in vehicles) { vehiclwInformation = new VehiclwInformation(); vehiclwInformation.Year = vehicle.Children[1].TextContent.Trim(); vehiclwInformation.Make = vehicle.Children[2].TextContent.Trim(); vehiclwInformation.Model = vehicle.Children[3].TextContent.Trim(); vehiclwInformation.Type = vehicle.Children[4].TextContent.Trim(); vehiclwInformation.Color = vehicle.Children[5].TextContent.Trim(); vehiclwInformation.Plate = vehicle.Children[6].TextContent.Trim(); vehiclwInformation.VIN = vehicle.Children[7].TextContent.Trim(); vehiclwInformation.Lot = vehicle.Children[8].TextContent.Trim(); vehiclwInformation.AdditionalInfo = vehicle.Children[9].TextContent.Trim(); shipping.VehiclwInformations.Add(vehiclwInformation); } } catch (Exception) { LogEr.Logerr("Error", $"some data is not added, Load id {shipping.Id}, Url: {shipping.UrlReqvest}", "SetVehicleInform", DateTime.Now.ToShortTimeString()); } }
public override async Task <IEnumerable <ChapterLink> > GetChapterLinksAsync(string baseUrl, CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(baseUrl, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); IElement contentElement = doc.GetElementById("mw-content-text"); if (contentElement == null) { return(null); } var possibleChapters = from e in contentElement.Descendents <IElement>() where e.LocalName == "a" let parent = e.ParentElement where parent != null where parent.LocalName == "li" let secondParent = parent.ParentElement where secondParent != null where secondParent.LocalName == "ul" select e; return(CollectChapterLinks(baseUrl, possibleChapters)); }
static void TestMod(IHtmlDocument document) { var description = document.GetElementById("__DESCRIPTION__"); var styleRegex = new Regex("style=\"([^\"]+\")"); // used by the string builder to insert the style to the correct index string parentStyle = styleRegex.Match(description.OuterHtml).Value; int semiIndex = parentStyle.IndexOf(";"); // find the index of the first semicolon StringBuilder styleOfParentBuilder = new StringBuilder(styleRegex.Match(description.OuterHtml).Value); string inner = description.InnerHtml; bool isBold = inner.Contains("<b>"); if (isBold) { styleOfParentBuilder.Insert(semiIndex, "; font-weight: bold "); // inserts the attribute to the style string finalStyle = styleOfParentBuilder.ToString(); var splitStyleOfParent = finalStyle.Split('"'); description.SetAttribute("style", splitStyleOfParent[1]); } }
public override MusicDetail Parse(IHtmlDocument document) { if (document == null) { throw new ArgumentNullException(nameof(document)); } if (!IsValidDocument(document)) { return(null); } var content = document.GetElementById("inner"); if (content == null) { return(null); } var musicDetail = new MusicDetail(); musicDetail.Name = GetName(content); musicDetail.ArtistName = GetArtistName(content); musicDetail.ImageName = GetImageName(content); musicDetail.Units = GetUnits(content); return(musicDetail); }
public List <AlternateHotel> ScrapeAlternateHotels(IHtmlDocument htmlDoc) { var alternateHotels = new List <AlternateHotel>(); var alternateHotelsTableRow = htmlDoc.GetElementById(ScrapingConstants.AlternateHotelsTableRowId); if (alternateHotelsTableRow == null) { return(alternateHotels); } var tableDataElements = alternateHotelsTableRow.GetElementsByTagName("td"); foreach (var tableDataElement in tableDataElements) { var hotelNameElement = tableDataElement.GetElementsByClassName(ScrapingConstants.AlternateHotelNameClass) .FirstOrDefault(); var hotelLink = hotelNameElement?.GetElementsByClassName(ScrapingConstants.AlternateHotelNameLinkClass) .FirstOrDefault(); var hotelName = hotelLink?.GetElementText() ?? string.Empty; var ratingStarsClass = hotelNameElement?.GetClassForChildWhereClassStartsWith(ScrapingConstants.StarRatingClassWildcard); var ratingStars = ratingStarsClass?.GetFirstInteger(); var hotelDescElement = tableDataElement.GetElementsByClassName(ScrapingConstants.AlternateHotelDescClass) .FirstOrDefault(); var hotelDesc = hotelDescElement?.GetElementText() ?? string.Empty; var hotelUrgencyElement = tableDataElement.GetElementsByClassName(ScrapingConstants.AlternateHotelUrgencyClass) .FirstOrDefault(); var hotelUrgencyMsg = hotelUrgencyElement?.GetElementText() ?? string.Empty; var hotelInfoElement = tableDataElement.GetElementsByClassName(ScrapingConstants.AlternateHotelInfoClass) .FirstOrDefault(); var reviewCountElement = hotelInfoElement?.GetElementsByClassName(ScrapingConstants.ReviewCountClass) .FirstOrDefault(); var reviewCountResult = int.TryParse(reviewCountElement?.GetElementText(), out var reviewCount); var scorewordElement = hotelInfoElement?.GetElementsByClassName(ScrapingConstants.ReviewScorewordClass) .FirstOrDefault(); var scoreword = scorewordElement?.GetElementText() ?? string.Empty; var scoreValElement = hotelInfoElement?.GetElementsByClassName(ScrapingConstants.ReviewScoreValClass) .FirstOrDefault(); var scoreValResult = double.TryParse(scoreValElement?.GetElementText(), out var scoreVal); var outOfElement = hotelInfoElement?.GetElementsByClassName(ScrapingConstants.ReviewBestScoreClass) .FirstOrDefault(); var outOfResult = int.TryParse(outOfElement?.GetElementText(), out var outOf); alternateHotels.Add(new AlternateHotel { Name = hotelName, RatingStars = ratingStars, ShortSummary = hotelDesc, Urgency = hotelUrgencyMsg, Review = new HotelReview { ReviewCount = reviewCountResult ? reviewCount : null, Scoreword = scoreword, Score = scoreValResult ? scoreVal : null, ScoreOutOf = outOfResult ? outOf : null } }); } return(alternateHotels); }
private static List <IElement> ListElementForModification(IHtmlDocument document) { // use a main StringBuilder and add and remove string to it. var ElementsForModification = new List <IElement>() { document.GetElementById("__DESCRIPTION__"), document.GetElementById("__MANUFACTURER__") }; var tags = new String[] { "__DESCRIPTION__", "__FEATURES__", "__MANUFACTURER__" }; foreach (var tag in tags) { ElementsForModification.AddRange(document.GetElementsByName(tag).AsEnumerable()); } ElementsForModification.RemoveAll(element => element == null); return(ElementsForModification); }
public string CoverUrl(IHtmlDocument page) { return(page .GetElementById("series_info")?//div .Elements("div") .First(e => e.HasClass("cover")) .Element("img") .Attributes.First(a => a.Name == "src").Value); }
public override PlaylogDetail Parse(IHtmlDocument document) { if (document == null) { throw new ArgumentNullException(nameof(document)); } if (!IsValidDocument(document)) { return(null); } var content = document.GetElementById("inner"); if (content == null) { return(null); } var playlogDetail = new PlaylogDetail(); playlogDetail.Name = GetName(content); playlogDetail.ImageName = GetImageName(content); playlogDetail.Difficulty = GetDifficulty(content); playlogDetail.Score = GetScore(content); playlogDetail.Rank = GetRank(content); // playlogDetail.IsChallengePiece playlogDetail.IsNewRecord = GetIsNewRecord(content); playlogDetail.IsClear = GetIsClear(content); playlogDetail.ComboStatus = GetComboStatus(content); playlogDetail.ChainStatus = GetChainStatus(content); playlogDetail.Track = GetTrack(content); playlogDetail.PlayDate = GetPlayDate(content); playlogDetail.StoreName = GetStoreName(content); playlogDetail.CharacterName = GetCharacterName(content); playlogDetail.SkillLevel = GetSkillLevel(content); playlogDetail.SkillName = GetSkillName(content); playlogDetail.SkillResult = GetSkillResult(content); playlogDetail.MaxCombo = GetMaxCombo(content); playlogDetail.JusticeCriticalCount = GetJusticeCriticalCount(content); playlogDetail.JusticeCount = GetJusticeCount(content); playlogDetail.AttackCount = GetAttackCount(content); playlogDetail.MissCount = GetMissCount(content); playlogDetail.TapPercentage = GetTapPercentage(content); playlogDetail.HoldPercentage = GetHoldPercentage(content); playlogDetail.SlidePercentage = GetSlidePercentage(content); playlogDetail.AirPercentage = GetAirPercentage(content); playlogDetail.FlickPercentage = GetFlickPercentage(content); return(playlogDetail); }
private void EnsureValidResponse(IHtmlDocument document, string siteUrl) { // Get the main body to determine if we've properly loaded the document var body = document.GetElementById("gs_bdy_ccl"); if (body == null) { throw new ApplicationException($"{siteUrl} could not be loaded"); } }
private void SetOrderInform(string sourse, ref Shipping shipping) { try { IHtmlDocument htmlDocument = htmlParser.Parse(sourse); var element1 = htmlDocument.GetElementById("sheetDetails") .GetElementsByClassName("panel panel-default")[1] .GetElementsByClassName("panel-body")[0]; var element = element1.GetElementsByClassName("col-xs-12 col-sm-6"); var el = element[0].GetElementsByTagName("p"); shipping.DispatchDate = el[0].TextContent.Remove(0, el[0].TextContent.IndexOf("Dispatch Date: ") + "Dispatch Date: ".Length); shipping.DispatchDate = shipping.DispatchDate.Remove(shipping.DispatchDate.IndexOf("\n")); shipping.PickupExactly = el[0].TextContent.Remove(0, el[0].TextContent.IndexOf("Pickup Exactly: ") + "Pickup Exactly: ".Length); shipping.PickupExactly = shipping.PickupExactly.Remove(shipping.PickupExactly.IndexOf("\n")).Trim(); if (shipping.PickupExactly.IndexOf("Dispatch Date: ") != -1) { shipping.PickupExactly = shipping.PickupExactly.Replace("Dispatch Date: ", ""); } shipping.DeliveryEstimated = el[0].TextContent.Remove(0, el[0].TextContent.IndexOf("Delivery Estimated: ") + "Delivery Estimated: ".Length); shipping.DeliveryEstimated = shipping.DeliveryEstimated.Remove(shipping.DeliveryEstimated.IndexOf("\n")).Trim(); if (shipping.DeliveryEstimated.IndexOf("Dispatch Date: ") != -1) { shipping.DeliveryEstimated = shipping.DeliveryEstimated.Replace("Dispatch Date: ", ""); } shipping.ShipVia = el[1].TextContent.Remove(0, el[1].TextContent.IndexOf(": ") + 2); shipping.Condition = el[2].TextContent.Remove(0, el[2].TextContent.IndexOf(": ") + 2); shipping.PriceListed = element[1].TextContent.Remove(0, element[1].TextContent.IndexOf("Total Payment to Carrier:") + "Total Payment to Carrier: ".Length); shipping.PriceListed = shipping.PriceListed.Remove(shipping.PriceListed.IndexOf("\n")); shipping.TotalPaymentToCarrier = element[1].TextContent.Remove(0, element[1].TextContent.IndexOf("On Delivery") + "On Delivery".Length).Trim(); shipping.TotalPaymentToCarrier = shipping.TotalPaymentToCarrier.Remove(0, shipping.TotalPaymentToCarrier.IndexOf("to Carrier:") + "to Carrier:".Length).Trim(); shipping.TotalPaymentToCarrier = shipping.TotalPaymentToCarrier.Remove(shipping.TotalPaymentToCarrier.IndexOf("\n")); shipping.OnDeliveryToCarrier = element1.TextContent.Remove(0, element1.TextContent.IndexOf("Company* owes Carrier:") + "Company* owes Carrier:".Length); shipping.OnDeliveryToCarrier = shipping.OnDeliveryToCarrier.Remove(0, shipping.OnDeliveryToCarrier.IndexOf(shipping.PriceListed) + shipping.PriceListed.Length).Trim().Replace("\n", ""); while (shipping.OnDeliveryToCarrier.Contains(" ")) { shipping.OnDeliveryToCarrier = shipping.OnDeliveryToCarrier.Replace(" ", " "); } if (shipping.TotalPaymentToCarrier != "None") { shipping.TotalPaymentToCarrier = shipping.TotalPaymentToCarrier.Remove(0, shipping.TotalPaymentToCarrier.IndexOf('*') + 1); } else { shipping.TotalPaymentToCarrier = shipping.OnDeliveryToCarrier.Remove(0, shipping.OnDeliveryToCarrier.IndexOf("within") + "within".Length).Trim(); shipping.TotalPaymentToCarrier = shipping.TotalPaymentToCarrier.Remove(shipping.TotalPaymentToCarrier.IndexOf(" ")) + " days"; } //shipping.CompanyOwesCarrier = element[1].TextContent.Remove(0, element[1].TextContent.IndexOf("Company") + "Company** owes Carrier:\n".Length); //shipping.CompanyOwesCarrier = shipping.CompanyOwesCarrier.Remove(0, shipping.CompanyOwesCarrier.IndexOf("\n")).TrimStart(); //shipping.CompanyOwesCarrier = shipping.CompanyOwesCarrier.Remove(shipping.CompanyOwesCarrier.IndexOf("\n")); } catch (Exception) { LogEr.Logerr("Error", $"some data is not added, Load id {shipping.Id}, Url: {shipping.UrlReqvest}", "SetOrderInform", DateTime.Now.ToShortTimeString()); } }
public List <RoomDetails> ScrapeHotelRooms(IHtmlDocument htmlDoc) { var roomsList = new List <RoomDetails>(); var hotelRoomsTable = htmlDoc.GetElementById(ScrapingConstants.HotelRoomsTableId); if (hotelRoomsTable == null) { return(roomsList); } var tableBody = hotelRoomsTable.GetElementsByTagName("tbody") .FirstOrDefault(); if (tableBody == null) { return(roomsList); } var tableRows = tableBody.GetElementsByTagName("tr"); foreach (var tableRow in tableRows) { var roomTypeElement = tableRow.GetElementsByClassName(ScrapingConstants.RoomTypeClass) .FirstOrDefault(); var roomType = roomTypeElement?.GetElementText() ?? string.Empty; var firstTd = tableRow.FirstElementChild; var firstOccupancyElement = firstTd.FirstElementChild; var childrenAllowed = false; string?adultOccupancyClass, childrenOccupancyClass = null; if (firstOccupancyElement.ClassList.Any(c => c == ScrapingConstants.KidsAllowedRoomClass)) { childrenAllowed = true; adultOccupancyClass = firstOccupancyElement. GetClassForChildWhereClassStartsWith(ScrapingConstants.AdultOccupancyClassWildcard); childrenOccupancyClass = firstOccupancyElement. GetClassForChildWhereClassStartsWith(ScrapingConstants.KidOccupancyClassWildcard); } else { adultOccupancyClass = firstOccupancyElement.ClassList .SingleOrDefault(c => c.StartsWith(ScrapingConstants.AdultOccupancyClassWildcard)); } var adultOccupancy = adultOccupancyClass?.GetFirstInteger(); var childrenOccupancy = childrenOccupancyClass?.GetFirstInteger(); roomsList.Add(new RoomDetails { RoomType = roomType, AdultOccupancy = adultOccupancy, ChildrenAllowed = childrenAllowed, ChildrenOccupancy = childrenOccupancy }); } return(roomsList); }
private void SetHeadInfo(Store store, IHtmlDocument htmlDocument) { try { string tmp = null; store.Name = htmlDocument.GetElementById("feedback-summary").GetElementsByTagName("tr")[0].TextContent.Replace("Seller:", "").Trim(); store.StartOfSales = htmlDocument.GetElementById("feedback-summary").GetElementsByTagName("tr")[2].Children[1].TextContent.Trim(); try { tmp = htmlDocument.GetElementById("feedback-dsr").GetElementsByTagName("tr")[0].GetElementsByTagName("td")[0].Children[0].Children[0].OuterHtml; tmp = tmp.Remove(0, tmp.IndexOf(':') + 1); store.ItemAsDescribed = tmp.Remove(tmp.IndexOf(';')); } catch { store.ItemAsDescribed = "0%"; } try { tmp = htmlDocument.GetElementById("feedback-dsr").GetElementsByTagName("tr")[1].GetElementsByTagName("td")[0].Children[0].Children[0].OuterHtml; tmp = tmp.Remove(0, tmp.IndexOf(':') + 1); store.Communication = tmp.Remove(tmp.IndexOf(';')); } catch { store.Communication = "0%"; } try { tmp = htmlDocument.GetElementById("feedback-dsr").GetElementsByTagName("tr")[2].GetElementsByTagName("td")[0].Children[0].Children[0].OuterHtml; tmp = tmp.Remove(0, tmp.IndexOf(':') + 1); store.ShippingSpeed = tmp.Remove(tmp.IndexOf(';')); } catch { store.ShippingSpeed = "0%"; } store.Positive4_5Stars = htmlDocument.GetElementById("feedback-history").Children[1].Children[0].Children[0].Children[1].Children[3].TextContent; store.Positive4_5Stars = store.Positive4_5Stars == " -" ? "0" : store.Positive4_5Stars; store.Neutral3Stars = htmlDocument.GetElementById("feedback-history").Children[1].Children[0].Children[0].Children[2].Children[3].TextContent; store.Neutral3Stars = store.Neutral3Stars == " -" ? "0" : store.Neutral3Stars; store.Negative1_2Stars = htmlDocument.GetElementById("feedback-history").Children[1].Children[0].Children[0].Children[3].Children[3].TextContent; store.Negative1_2Stars = store.Negative1_2Stars == " -" ? "0" : store.Negative1_2Stars; store.DateUpdate = DateTime.Now.ToString(); } catch (Exception e) { string storeIsNulOrId = store != null?store.IDShope.ToString() : "null"; File.AppendAllText("log/SetHeadInfo.txt", $"{e.Message}, Store {storeIsNulOrId} {Environment.NewLine}"); } }
public static IElement SkipLine(this IElement element, IHtmlDocument document, int skipAfterLines) { char[] numberLine = element.Id .SkipWhile(lineId => !lineId.IsDigit()) .ToArray(); var requiredNumber = int.Parse(new string(numberLine)) + skipAfterLines; return(document .GetElementById($"LC{requiredNumber}")); }
public override MusicGenre Parse(IHtmlDocument document) { if (document == null) { throw new ArgumentNullException(nameof(document)); } if (!IsValidDocument(document)) { return(null); } var scoreListResult = document.GetElementById("scoreList_result"); if (scoreListResult == null) { return(null); } var musicDetail = document.GetElementById("inner"); if (musicDetail == null) { return(null); } var musicGenre = new MusicGenre(); musicGenre.MusicCount = GetMusicCount(scoreListResult); musicGenre.ClearCount = GetClearCount(scoreListResult); musicGenre.SCount = GetSCount(scoreListResult); musicGenre.SsCount = GetSsCount(scoreListResult); musicGenre.SssCount = GetSssCount(scoreListResult); musicGenre.FullComboCount = GetFullComboCount(scoreListResult); musicGenre.AllJusticeCount = GetAllJusticeCount(scoreListResult); musicGenre.FullChainGoldCount = GetFullChainGoldCount(scoreListResult); musicGenre.FullChainPlatinumCount = GetFullChainPlatinumCount(scoreListResult); musicGenre.Units = GetUnits(musicDetail); return(musicGenre); }
public string Parse(IHtmlDocument document) { var priceContainer = document.GetElementById("product-price-20866"); string price = "бэд реквест"; if (priceContainer != null) { price = priceContainer.TextContent; } return(price); }
public IEnumerable <string> ChapterUrls(IHtmlDocument doc) { return(doc.GetElementById("chapters")? //div .Elements("ul") .SelectMany(ul => ul.Elements("li")) .Select(n => n.Element("div")) .Select(d => d.Element("h3") ?? d.Element("h4")) .Select(d => d.Element("a")) .Select(a => a.GetAttribute("href")) .Select(url => $"http:{url}") .ToList() ?? new List <string>()); }
public IEnumerable <string> ChapterUrls(IHtmlDocument doc) { return(doc .GetElementById("listing") .Elements("tr") .Where(n => !n.HasClass("table_head")) .Select(n => n.Element("td")) .Select(d => d.Element("a")) .Select(a => a.GetAttribute("href")) .Select(url => $"http://mangapanda.com{url}") .ToList()); }
//Parses out the total number of pages in this post private void ParseCurrentPageNumber(IHtmlDocument document) { IElement pager = document.GetElementById("ctl00_cphRoblox_PostView1_ctl00_Pager"); if (pager == null) { throw new Exception($"No pager for Page in Thread: {thread_id}"); } string pageText = pager.QuerySelector("table tr:nth-child(1) td:nth-child(1) span").TextContent; MatchCollection matches = Regex.Matches(pageText, "[0-9]+", RegexOptions.IgnoreCase); PageNumber = int.Parse(matches[0].Value) - 1; }
public MetaData GetMetaData(IHtmlDocument doc) { var metaData = new MetaData { Blurb = doc.GetElementById("mangaDescription").TextContent, }; var rating = doc.GetElementById("rating"); var author = rating.GetNextSiblingWithText("Author"); metaData.Author = author?.NextElementSibling?.TextContent; var artist = (author ?? rating).GetNextSiblingWithText("Artist"); metaData.Artist = artist?.NextElementSibling?.TextContent; var genres = (artist ?? rating).GetNextSiblingWithText("Genres"); metaData.Genres = GetGenreATags(genres) .Select(e => e.TextContent) .Select(t => t.ParseAsGenre()) .Where(t => t != Genre.None) .Merge(); return(metaData); }
private bool PageExists(IHtmlDocument document) { IElement node = document.GetElementById("ctl00_cphRoblox_Message1_ctl00_MessageTitle"); if (node != null) { string message = node.TextContent; if (message.ToLower().Contains("error")) { return(false); } } return(true); }