private static CambridgeWordInfo.Entry ParseEntryFromEntryElement(IParentNode entryElement) { var entry = new CambridgeWordInfo.Entry(); var headWordElement = entryElement.QuerySelector(".headword > span.hw"); if (headWordElement == null) { headWordElement = entryElement.QuerySelector(".headword > span.phrase"); } if (headWordElement != null) { entry.ActualWord = headWordElement.TextContent; } else { return(null); } var labelElement = entryElement.QuerySelector(".posgram > span.pos"); if (labelElement != null) { entry.Label = labelElement.TextContent; } entry.Definitions = ParseDefBlocksFromEntryElement(entryElement); return(entry); }
private static void AssertError(IParentNode responseContent, string id, string field, string errorMessage) { var element = responseContent.QuerySelector($"#{field}"); var elementDiv = element.ParentElement.ParentElement; var groupDiv = responseContent.QuerySelector($"#{id}-group"); groupDiv.ClassName.Should().Contain("govuk-form-group--error"); elementDiv.QuerySelector($"#{id}-error").TextContent.Should() .Be(errorMessage); }
/// <summary> /// Returns the first element within the document that matches the /// specified XPath query or CSS selector. /// </summary> /// <param name="node">A node to search within.</param> /// <param name="path">The XPath query or CSS selector.</param> /// <returns>The found element.</returns> public static IElement QuerySelectorOrXPath(this IParentNode node, string path) { // If path is a XPath query... if (path.StartsWith("/")) { // Return result of the XPath query // (We use XPath directly in the CSS selector, because IParentNode is missing SelectSingleNode method) return(node.QuerySelector($"*[xpath>'{path}']")); } // Otherwise assume CSS selector return(node.QuerySelector(path)); }
private static CarModel CreateModelWithAngleSharp(IParentNode node) { var model = new CarModel { Title = node.QuerySelector("a div.carbox-title").TextContent, ImageUrl = node.QuerySelector("a div.carbox-carimg img").GetAttribute("src"), ProductUrl = node.QuerySelector("a").GetAttribute("href"), Tip = node.QuerySelector("a div.carbox-tip").TextContent, OrdersNumber = node.QuerySelector("a div.carbox-number span").TextContent }; return(model); }
private static Dictionary <string, GameInfo> ListGamesInNode(IParentNode parentNode) => parentNode.QuerySelector("#lcp_instance_0") // Get unordered list containing all games on page .Children.Select(e => e.Children[0]) // Get the anchor first elements within the list item elements .Cast <IHtmlAnchorElement>() .ToDictionary(e => e.Text, e => new GameInfo { PageUrl = e.Href });
public Rating Parse(IParentNode node) { var raitings = node.QuerySelector("div.ratings"); var stars = raitings.Children; const int max = 5; double rating = max; double diff = max; int i = 0; while (i <= stars.Length) { var star = stars[i]; if (star.ClassList.Contains("selected")) { diff = i; if (star.ClassList.Contains("half")) { diff += 0.5; } break; } i++; } rating -= diff; return(new Rating(rating, max)); }
private static string GetTitle(IParentNode listingItem) { var artistTitleLink = listingItem.QuerySelector(".juno-title"); string artistTitle = artistTitleLink.TextContent; return(artistTitle); }
public static string GetError(this IParentNode document, string input) { var errorSpan = document?.QuerySelector(EscapeQuerySelector($"span#{input}-error")); Assert.NotNull(errorSpan); return(errorSpan.ClassList.Contains("hidden") ? null : errorSpan.TextContent); }
private static void AssertError(IParentNode responseContent, string fieldValMsg, string errorMessage) { var spanError = responseContent.QuerySelector($"span[data-valmsg-for='{fieldValMsg}']"); spanError.ClassName.Should().Be("govuk-error-message field-validation-error"); spanError.TextContent.Should().Be(errorMessage); }
public Price ParsePrice(IParentNode node) { return(new Price() { Currency = "RUB", Value = double.Parse(node.QuerySelector("div.price ins.num").TextContent) }); }
private static void AssertError(IParentNode responseContent, string field, string errorMessage) { var input = responseContent.QuerySelector($"#{field}"); var div = input.ParentElement; div.ClassName.Should().Be("govuk-form-group govuk-form-group--error"); div.QuerySelector(".govuk-error-message").TextContent.Should() .Be(errorMessage); }
private static void AssertError(IParentNode responseContent, string labelFor, string errorMessage) { var label = responseContent.QuerySelector($"label[for='{labelFor}']"); var labelDiv = label.ParentElement; labelDiv.ClassName.Should().Be("govuk-form-group govuk-form-group--error"); labelDiv.QuerySelector(".govuk-error-message").TextContent.Should() .Be(errorMessage); }
public override object Parse(IParentNode node, Type valueType) { var ti = valueType.GetTypeInfo(); if (!ti.IsPrimitive && !valueType.Equals(typeof(string))) { throw new NotSupportedException("HtmlValueQueryAttribute 只支持基元类型"); } var ele = node.QuerySelector(this.Selector); return this.Parse(ele, valueType); }
/// <summary> /// /// </summary> /// <param name="document">the html page</param> /// <param name="spell">An almost empty spell</param> /// <returns>A filled spell</returns> private static Spell ParseSpellFromDocument(IParentNode document, Spell spell) { spell.Name = document.QuerySelector(".pagetitle").TextContent; var div = document.QuerySelector("#frmPrint"); div.RemoveChild(div.LastChild); // rm script var html = div.InnerHtml.Split("<br><br>"); var parser = new HtmlParser(); var content = html .FirstOrDefault()?.Trim() .Split("<br>", StringSplitOptions.RemoveEmptyEntries) .Select(x => x.Substring(x.IndexOf("</b> ") + 5)) .Select(x => parser.Parse(x).DocumentElement.TextContent) .ToArray(); var firstline = content[0].Split(" ; Niveau ", StringSplitOptions.RemoveEmptyEntries); spell.School = firstline[0].Contains(" ") ? firstline[0].Remove(firstline[0].IndexOf(' ')) : firstline[0]; spell.SchoolFull = firstline[0]; //spell.Level = firstline[1] // .Split (", ", StringSplitOptions.RemoveEmptyEntries) // .Select (x => x.Split (' ')) // no space sometime Bard6 // .ToDictionary (x => x[0], x => int.Parse (x[1])); spell.CastingTime = content[1]; spell.Components = content[2]; spell.Range = content[3]; spell.Target = Find(content, "Cibles ", "Cible "); // should be line 4 spell.Effect = Find(content, "Effet "); // should be line 4 spell.Zone = Find(content, "Zone d'effet "); // should be line 4 spell.Duration = content[5]; var lastline = content[6].Split(" ; Résistance à la magie ", StringSplitOptions.RemoveEmptyEntries); spell.SavingThrow = lastline[0]; spell.SpellResistance = lastline[1]; spell.Description = string .Concat(html?.Skip(1).Select(x => parser.Parse(x).DocumentElement.TextContent)).Trim(); return(spell); }
private static void AssertError(IParentNode responseContent, string field) { var input = responseContent.QuerySelector($"#{field}"); var div = input.ParentElement; div.ClassName.Should().Be("govuk-form-group govuk-form-group--error"); //No error message shown on this page, just the error border var errorSpan = div.QuerySelector(".govuk-error-message") as IHtmlSpanElement; errorSpan.Should().BeNull(); }
private async Task <List <ReleaseInfo> > ParseReleasesAsync(IParentNode row, TorznabQuery query, bool matchWords) { var releases = new List <ReleaseInfo>(); var anchor = row.QuerySelector("a"); var title = anchor.TextContent.Trim(); // match the words in the query with the titles if (matchWords && !CheckTitleMatchWords(query.SearchTerm, title)) { return(releases); } var detailsStr = anchor.GetAttribute("href"); var cat = detailsStr.Split('/')[3]; var categories = MapTrackerCatToNewznab(cat); // return results only for requested categories if (query.Categories.Any() && !query.Categories.Contains(categories.First())) { return(releases); } var publishStr = row.QuerySelectorAll("td")[2].TextContent.Trim(); var publishDate = TryToParseDate(publishStr, DateTime.Now); var sizeStr = row.QuerySelectorAll("td")[3].TextContent.Trim(); // parsing is different for each category if (cat == DivxTotalCategories.Series) { var seriesReleases = await ParseSeriesReleaseAsync(query, detailsStr, cat, publishDate); releases.AddRange(seriesReleases); } else if (query.Episode == null) // if it's scene series, we don't return other categories { if (cat == DivxTotalCategories.Peliculas || cat == DivxTotalCategories.PeliculasHd || cat == DivxTotalCategories.Peliculas3D || cat == DivxTotalCategories.PeliculasDvdr) { var movieRelease = ParseMovieRelease(query, title, detailsStr, cat, publishDate, sizeStr); releases.Add(movieRelease); } else { var size = TryToParseSize(sizeStr, DivxTotalFizeSizes.Otros); var release = GenerateRelease(title, detailsStr, detailsStr, cat, publishDate, size); releases.Add(release); } } return(releases); }
private static string GetArtists(IParentNode listingItem) { var artistLinks = listingItem.QuerySelectorAll(".juno-artist a"); string artistNames = String.Join(", ", artistLinks.Select(a => a.TextContent)); if (String.IsNullOrEmpty(artistNames)) { // "Various" as artists case artistNames = listingItem.QuerySelector(".juno-artist").TextContent; } return(artistNames); }
/// <summary> /// Returns the first element matching the selectors with the provided /// type, or null. /// </summary> /// <typeparam name="TElement">The type to look for.</typeparam> /// <param name="parent">The parent of the nodes to gather.</param> /// <param name="selectors">The group of selectors to use.</param> /// <returns>The element, if there is any.</returns> public static TElement QuerySelector <TElement>(this IParentNode parent, String selectors) where TElement : class, IElement { if (parent == null) { throw new ArgumentNullException("parent"); } else if (selectors == null) { throw new ArgumentNullException("selectors"); } return(parent.QuerySelector(selectors) as TElement); }
/// <summary> /// Returns the first element matching the selectors with the provided /// type, or null. /// </summary> /// <typeparam name="TElement">The type to look for.</typeparam> /// <param name="parent">The parent of the nodes to gather.</param> /// <param name="selectors">The group of selectors to use.</param> /// <returns>The element, if there is any.</returns> public static TElement?QuerySelector <TElement>(this IParentNode parent, String selectors) where TElement : class, IElement { if (parent is null) { throw new ArgumentNullException(nameof(parent)); } if (selectors is null) { throw new ArgumentNullException(nameof(selectors)); } return(parent.QuerySelector(selectors) as TElement); }
private static string GetItemName(IParentNode node) { string[] selectors = { "span.item_title", "h4.menu-list__item-title" }; foreach (string selector in selectors) { IElement title = node.QuerySelector(selector); if (title != null) { return(Normalize(title.Text())); } } // No selectors matched, throw exception throw new InvalidOperationException("Title doesn't exist in node"); }
private static bool TryGetDate(IParentNode listingItem, out DateTimeOffset date) { date = DateTimeOffset.MinValue; string[] releaseDate = listingItem.QuerySelector(".text-right > .text-sm") ?.InnerHtml ?.Split("<br>"); if (releaseDate == null || releaseDate.Length != 3) { return(false); } date = DateTimeOffset.ParseExact(releaseDate[1], "dd MMM yy", CultureInfo.InvariantCulture); return(true); }
private static (IElement Element, IHtmlItem HtmlItem) GetFirstOfDefaultNode(IParentNode element, IEnumerable <IHtmlItem> attributes) { IElement node = null; IHtmlItem htmlItem = null; foreach (var attribute in attributes) { node = element.QuerySelector(attribute.Path); if (node == null) { continue; } htmlItem = attribute; break; } return(node, htmlItem); }
private static (IElement Element, IHtmlItem HtmlItem) GetFirstOfDefaultNode(IParentNode element, IEnumerable <IHtmlItem> attributes) { IElement node = null; IHtmlItem htmlItem = null; foreach (var attribute in attributes) { node = element.QuerySelector(attribute.Selector); if (node == null || !string.IsNullOrEmpty(attribute.Attr) && !node.HasAttribute(attribute.Attr)) { continue; } htmlItem = attribute; break; } return(node, htmlItem); }
private static string GetDescription(IParentNode document) { var description = document.QuerySelector("#event-description"); var bPageShare = description.QuerySelector(".b-page-share"); var prmplace = description.QuerySelector(".b-prmplace-media"); var note = description.QuerySelector(".note"); if (bPageShare != null) { description.RemoveElement(bPageShare); } if (prmplace != null) { description.RemoveElement(prmplace); } if (note != null) { description.RemoveElement(note); } return(description.InnerHtml); }
private static List <Mirror> ListMirrorsInNode(IParentNode parentNode) { List <Mirror> mirrorList = new List <Mirror>(); void AddMirrorsFromElement(IElement element) { if (element is IHtmlAnchorElement a && a.Text != "JDownloader2") { mirrorList.Add(new Mirror { MirrorName = Regex.Replace(a.Text, "filehoster(s?): ", "", RegexOptions.IgnoreCase), MirrorUrl = a.Href }); } } IElement mirrorListElement = parentNode.QuerySelector(".entry-content") .Children.First(e => e.Text().Contains("download mirrors", StringComparison.CurrentCultureIgnoreCase)) .NextElementSibling; foreach (IElement element in mirrorListElement.Children) { if (element.LocalName == "li") { foreach (IElement child in element.Children) { AddMirrorsFromElement(child); } } else { AddMirrorsFromElement(element); } } return(mirrorList); }
public static IElement El(this IParentNode b, string selector) => b.QuerySelector(selector);
private static string GetDownloadLink(IParentNode dom) => dom.QuerySelector($"a[href*=\"{DownloadLink}\"]")?.GetAttribute("href");
public static IElement QuerySelectorOrThrow(this IParentNode parent, string selector) => parent.QuerySelector(selector) ?? throw new InvalidOperationException($"Can't find any element matching selector '{selector}'.");
public static string QsAttr(this IParentNode b, string selector, string attribute) => b.QuerySelector(selector)?.GetAttribute(attribute);
public string ParseName(IParentNode node) { return(node.QuerySelector("div.product_header h1").TextContent); }
private static IEnumerable<Property> BuildSvgPropertyList(IParentNode table) { var properties = new List<Property>(); var t = table.QuerySelector("thead").QuerySelectorAll("th").Select(b => b.TextContent.Trim()).ToList(); foreach (var element in table.QuerySelector("tbody").QuerySelectorAll("tr")) { var p = new Property(); for (var i = 0; i < t.Count; i++) { var f = element.QuerySelectorAll("th, td")[i].TextContent.Trim(); p.GetType().GetProperty(FixPropertyName(t[i])).SetValue(p, f, null); } AddSingleOrMultipleProperties(p, properties); } return properties; }
public static T Qs<T>(this IParentNode b, string selector) where T : class, IElement => b.QuerySelector(selector) as T;