/// <summary> /// Use this to prepare the iterator object to continue finding siblings. It retains the parent. It just avoids having to recreate /// an instance of this object for the next tag. /// </summary> public void Reset() { Step = 0; HtmlStart = Pos; ReadTextOnly = false; Object = null; }
/// <summary> /// Test whether an element is the last child of its type. /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// /// <returns> /// true if it matches, false if not. /// </returns> public override bool Matches(IDomObject element) { return element.ParentNode.ChildElements .Where(item => item.NodeNameID == element.NodeNameID) .LastOrDefault() == element; }
/// <summary> /// Renders the object to the textwriter. /// </summary> /// /// <exception cref="NotImplementedException"> /// Thrown when the requested operation is unimplemented. /// </exception> /// /// <param name="node"> /// The node. /// </param> /// <param name="writer"> /// The writer to which output is written. /// </param> public void Render(IDomObject node, TextWriter writer) { switch (node.NodeType) { case NodeType.ELEMENT_NODE: RenderElement(node,writer,true); break; case NodeType.DOCUMENT_FRAGMENT_NODE: case NodeType.DOCUMENT_NODE: RenderElements(node.ChildNodes,writer); break; case NodeType.TEXT_NODE: RenderTextNode(node, writer,false); break; case NodeType.CDATA_SECTION_NODE: RenderCdataNode(node, writer); break; case NodeType.COMMENT_NODE: RenderCommentNode(node, writer); break; case NodeType.DOCUMENT_TYPE_NODE: RenderDocTypeNode(node, writer); break; default: throw new NotImplementedException("An unknown node type was found while rendering the CsQuery document."); } }
/// <summary> /// Test whether an element is the only element of its type among its siblings. /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// /// <returns> /// true if it matches, false if not. /// </returns> public override bool Matches(IDomObject element) { return element.ParentNode.ChildElements .Where(item => item.NodeNameID == element.NodeNameID) .SingleOrDefaultAlways() != null; }
/// <summary> /// Test whether an element is a parent; e.g. has children. /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// /// <returns> /// true if it matches, false if not. /// </returns> public override bool Matches(IDomObject element) { return element.HasChildren ? !Empty.IsEmpty(element) : false; }
/// <summary> /// Use this to prepare the iterator object to continue finding siblings. It retains the parent. /// It just avoids having to recreate an instance of this object for the next tag. /// </summary> public void Reset() { TokenizerState = TokenizerState.Default; HtmlStart = Pos; InsertionMode = InsertionMode.Default; Element = null; }
/// <summary> /// Test whether an element contains no non-empty children. An element can technically have /// children, but if they are text nodes with empty values, then it's considered empty. /// </summary> /// /// <param name="element"> /// The element to test /// </param> /// /// <returns> /// true if an element is empty, false if not. /// </returns> public static bool IsEmpty(IDomObject element) { return !element.ChildNodes .Where(item => item.NodeType == NodeType.ELEMENT_NODE || (item.NodeType == NodeType.TEXT_NODE && !String.IsNullOrEmpty(item.NodeValue))) .Any(); }
/// <summary> /// Renders this object and returns the output as a string. /// </summary> /// /// <param name="node"> /// The node. /// </param> /// /// <returns> /// A string of HTML. /// </returns> public string Render(IDomObject node) { using (StringWriter writer = new StringWriter()) { Render(node, writer); return writer.ToString(); } }
/// <summary> /// Renders this object to the passed TextWriter. /// </summary> /// /// <param name="node"> /// The node. /// </param> /// <param name="writer"> /// The writer. /// </param> public void Render(IDomObject node, TextWriter writer) { stringInfo = CharacterData.CreateStringInfo(); StringBuilder sb = new StringBuilder(); AddContents(sb, node,true); writer.Write(sb.ToString()); }
/// <summary> /// Test whether an element is a header (H1-H6) /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// /// <returns> /// true if it matches, false if not. /// </returns> public override bool Matches(IDomObject element) { var nodeName = element.NodeName; return nodeName[0] == 'H' && nodeName.Length == 2 && nodeName[1] >= '0' && nodeName[1] <= '6'; }
/// <summary> /// Add an item to the list of selected elements. It should be part of this DOM. /// </summary> /// <param name="element"></param> protected bool AddSelection(IDomObject element) { //if (!ReferenceEquals(element.Dom, Dom)) //{ // throw new InvalidOperationException("Cannot add unbound elements or elements bound to another DOM directly to a selection set."); //} return SelectionSet.Add(element); }
private IEnumerable<IDomObject> ParentImpl(IDomObject input) { if (input.ParentNode != null && input.ParentNode.NodeType == NodeType.ELEMENT_NODE) { yield return input.ParentNode; } }
static DateTimeOffset ParseEventDate(CQ dom, IDomObject @event) { var date = dom.Select("td.date", @event).Text().Trim(); DateTimeOffset eventDate; DateTimeOffset.TryParse(date, out eventDate); return eventDate; }
static string[] ParseCommandsNames(CQ dom, IDomObject @event) { var commands = dom.Select( "td.today-name > span.command > div.today-member-name, td.name > span.command div.member-name", @event).Map(node => node.Cq().Text().Trim().Replace(Nbsp, ' ')).ToArray(); Debug.Assert(commands.Count() == 2, "Teams names have not been recognized: " + @event.InnerHTML); return commands; }
public ArtistAlbumItem(IDomObject htmlData) { var s = CQ.Create(htmlData); Title = HttpUtility.HtmlDecode(s[".title a"]?.FirstOrDefault()?.InnerHTML?.Trim() ?? "Unknown"); Year = HttpUtility.HtmlDecode(s[".year"]?.FirstOrDefault()?.InnerHTML?.Trim() ?? "-1").ToInt(); Url = HttpUtility.HtmlDecode(s[".title a"]?.FirstOrDefault()?.Attributes? .FirstOrDefault(x => x.Key == "href").Value ?? null); ID = Url.SubstringFromLastIndex('/', 1); }
public LinkTagCssSource(IDomObject node, Uri baseUri) { // There must be an href var href = node.Attributes.First(a => a.Key.Equals("href", StringComparison.OrdinalIgnoreCase)).Value; if (Uri.IsWellFormedUriString(href, UriKind.Relative) && baseUri != null) _downloadUri = new Uri(baseUri, href); else // Assume absolute _downloadUri = new Uri(href); }
/// <summary> /// Select elements from within a context. /// </summary> /// /// <param name="selector"> /// A string containing a selector expression. /// </param> /// <param name="context"> /// The point in the document at which the selector should begin matching; similar to the context /// argument of the CQ.Create(selector, context) method. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/jQuery/#jQuery1 /// </url> public CQ Select(string selector, IDomObject context) { var selectors = new Selector(selector); var selection = selectors.Select(Document, context); CQ csq = NewInstance(selection, this); csq.Selector = selectors; return(csq); }
public static IList<AttributeToCss> FindEquivalent(IDomObject domobject, StyleClass styles) { return (from attributeRuleMatch in _linkedAttributes where domobject.HasAttribute(attributeRuleMatch.Key) && styles.Attributes.ContainsKey(attributeRuleMatch.Value) select new AttributeToCss { AttributeName = attributeRuleMatch.Key, CssValue = styles.Attributes[attributeRuleMatch.Value].Value }).ToList(); }
static IEnumerable<ParsedEventData> ParseEvents(CQ dom, IDomObject @event) { return dom.Select("td.js-price", @event) .Map(node => { var domNode = node.Cq(); var coefficient = double.Parse(domNode.Find("span.selection-link").Text().Trim()); var specification = domNode.Children().Remove().End().Text().Trim(); return new ParsedEventData {Coefficient = coefficient, Specification = specification}; }); }
/// <summary> /// Renders all the children of the passed node. /// </summary> /// /// <param name="element"> /// The element. /// </param> protected virtual void ParseChildren(IDomObject element) { if (element.HasChildren) { foreach (IDomObject el in element.ChildNodes.Reverse()) { NodeStackElement nodeStackEl = new NodeStackElement(el, el.NodeType == NodeType.TEXT_NODE && HtmlData.HtmlChildrenNotAllowed(element.NodeNameID), false); OutputStack.Push(nodeStackEl); } } }
private void AddParent(IDomObject element, int index) { DomObject item = element as DomObject; item.ParentNode = Owner; item.Index = index; if (element.IsIndexed) { item.Document.DocumentIndex.AddToIndex((IDomIndexedNode)element); } }
private IDomObject GetNext(IDomObject domObj, string tagType) { if (domObj.NextElementSibling.NodeName.Equals(tagType, StringComparison.InvariantCultureIgnoreCase)) { return(domObj.NextElementSibling); } else { return(GetNext(domObj.NextElementSibling, tagType)); } }
/// <summary> /// Create a new CQ object from a single element. Unlike the constructor method <see cref="CsQuery.CQ"/> /// this new objet is not bound to any context from the element. /// </summary> /// /// <param name="element"> /// The element to wrap /// </param> /// /// <returns> /// A new CQ object /// </returns> public static CQ Create(IDomObject element) { CQ csq = new CQ(); if (element is IDomDocument) { csq.Document = (IDomDocument)element; csq.AddSelection(csq.Document.ChildNodes); } else { csq.CreateNewFragment(Objects.Enumerate(element)); } return csq; }
private IDomObject OnlyChildOrNull(IDomObject parent) { if (parent.NodeType == NodeType.DOCUMENT_NODE) { return null; } else { return parent.ChildElements.SingleOrDefaultAlways(); } }
private static string DeEntitize(IDomObject x) { try { return(GetParagraphText(x)); } catch { return(String.Empty); } }
protected void RemoveParent(IDomObject element) { if (element.ParentNode != null) { if (!element.IsDisconnected && element is IDomIndexedNode) { element.Document.RemoveFromIndex((IDomIndexedNode)element); } ((DomObject)element).ParentNode = null; } }
private static string GetParagraphText(IDomObject x) { if (string.IsNullOrWhiteSpace(x.InnerText)) { return(String.Empty); } return(GetText(x, y => y.Render(OutputFormatters.HtmlEncodingMinimum)).Replace(" ", " ") .Replace(" ", " ") .Trim()); }
protected void AddParent(IDomObject element, int index) { DomObject item = (DomObject)element; item.ParentNode = Owner; item.Index = index; if (element is IDomIndexedNode && !element.IsDisconnected) { element.Document.AddToIndex((IDomIndexedNode)element); } }
/// <summary> /// Renders the comment node. /// </summary> /// /// <param name="element"> /// The element to render /// </param> /// <param name="writer"> /// The writer to which output is written. /// </param> protected void RenderCommentNode(IDomObject element, TextWriter writer) { if (DomRenderingOptions.HasFlag(DomRenderingOptions.RemoveComments)) { return; } else { writer.Write("<!--" + element.NodeValue + "-->"); } }
private IEnumerable <IDomObject> nextPrevAllImpl(IDomObject input, bool next) { IDomObject item = next ? input.NextElementSibling : input.PreviousElementSibling; while (item != null) { yield return(item); item = next ? item.NextElementSibling : item.PreviousElementSibling; } }
private IEnumerable <IDomObject> nextPrevUntilFilterImpl(IDomObject input, HashSet <IDomElement> untilEls, bool next) { foreach (IDomElement el in nextPrevAllImpl(input, next)) { if (untilEls.Contains(el)) { break; } yield return(el); } }
/// <summary> /// Renders the text node. /// </summary> /// /// <param name="textNode"> /// The text node. /// </param> /// <param name="writer"> /// The writer to which output is written. /// </param> /// <param name="raw"> /// true to raw. /// </param> protected virtual void RenderTextNode(IDomObject textNode, TextWriter writer, bool raw) { if (raw) { writer.Write(textNode.NodeValue); } else { HtmlEncoder.Encode(textNode.NodeValue, writer); } }
private string extractTurkceWord(IDomObject de) { // remove whitespaces and etc. string text = Regex.Replace(de.InnerText,@"\t|\n|\r", " "); // remove Square Brackets and its content text = Regex.Replace(text,@"\[[^\]]*\]",""); // \[ [ ^ \] ]* \] // get string before before Colon Regex re = new Regex(@"[^\:]*(?=\:)"); text = re.Match(text).ToString().Trim(); return text; }
private IDomObject OnlyChildOrNull(IDomObject parent) { if (parent.NodeType == NodeType.DOCUMENT_NODE) { return(null); } else { return(parent.ChildElements.SingleOrDefaultAlways()); } }
private void AddScript(StringBuilder scriptTags, IDomObject el) { CQ cq = CQ.Create(el); var obj = new { src = "src=\"{0}\""._Format(cq.Attr("src")) }; scriptTags.Append("<script {src}></script>".NamedFormat(obj)); }
internal virtual void DeleteDom(IRepository repo, RenderContext context, IDomObject domObject, Element element) { var sourceUpdate = new SourceUpdate { StartIndex = element.location.openTokenStartIndex, EndIndex = element.location.endTokenEndIndex, NewValue = string.Empty }; UpdateDomObject(repo, context, domObject, sourceUpdate); }
public override bool Matches(IDomObject element) { //StartNewSelector(SelectorType.Attribute); //Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; //Current.TraversalType = TraversalType.Inherited; //Current.AttributeName = "lang"; //Current.Criteria = scanner.GetBoundedBy('(', false); //break; // throw new NotImplementedException(":lang is not currently implemented."); }
public static string Text(this IDomObject obj) { var text = obj.InnerText; if (text == null) { return(null); } text = HttpUtility.HtmlDecode(text.Trim()); return(text == "-" ? null : text.TrimText()); }
/// <summary> /// Test whether the element is empty /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// /// <returns> /// true if it has no non-whitespace children, false if not /// </returns> public override bool Matches(IDomObject element) { // try to optimize this by checking for the least labor-intensive things first if (element.HasChildren) { return(false); } else { return(IsEmpty(element)); } }
/// <summary> /// Test whether the element is empty /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// /// <returns> /// true if it has no non-whitespace children, false if not /// </returns> public override bool Matches(IDomObject element) { // try to optimize this by checking for the least labor-intensive things first if (!element.HasChildren) { return true; } else { return IsEmpty(element); } }
private void RemoveAttribute(IDomObject tag, KeyValuePair <string, string> attribute) { var e = new RemovingAttributeEventArgs { Attribute = attribute }; OnRemovingAttribute(e); if (!e.Cancel) { tag.RemoveAttribute(attribute.Key); } }
public static IEnumerable <IDomElement> GetDescendantElements(IDomObject element) { foreach (var child in element.ChildElements) { yield return(child); foreach (var grandChild in GetDescendantElements(child)) { yield return(grandChild); } } }
/// <summary> /// Remove an element from this element's children. /// </summary> /// /// <param name="item"> /// The item to remove. /// </param> /// /// <returns> /// true if it succeeds, false if the item was not found in the children. /// </returns> public bool Remove(IDomObject item) { if (item.ParentNode != this.Owner) { return(false); } else { RemoveAt(item.Index); return(true); } }
private bool UpdateCssPath(bool foundFirstCss, IDomObject element) { if (foundFirstCss == false) { element.SetAttribute("href", Globals.FullPath(_styleBundle.Path)); } else { element.Remove(); } return(true); }
private void RemoveParent(IDomObject element) { if (element.ParentNode != null) { DomObject item = element as DomObject; if (!element.IsDisconnected && element.IsIndexed) { item.Document.DocumentIndex.RemoveFromIndex((IDomIndexedNode)element); } item.ParentNode = null; } }
private IList <CbCoin> ScrapeCoins(IDomObject parentDiv) { var coins = new List <CbCoin>(); CQ html = parentDiv.InnerHTML; var coinRows = html["table > tbody > tr"].ToList(); foreach (var coinRow in coinRows) { CQ coinHtml = coinRow.InnerHTML; // scrape coin data var tdCoinData = coinHtml.Elements.ToList(); var rawYear = tdCoinData[0].FirstChild.FirstChild.NodeValue; var year = rawYear.Substring(0, 4); var mintMark = rawYear.Length > 5 ? rawYear.Substring(5, 1) : ""; var details = tdCoinData[1].InnerText; var mintage = tdCoinData[2].InnerText; var gradeG4 = tdCoinData[3].InnerText; var gradeVG8 = tdCoinData[4].InnerText; var gradeF12 = tdCoinData[5].InnerText; var gradeVF20 = tdCoinData[6].InnerText; var gradeEF40 = tdCoinData[7].InnerText; var gradeAU50 = tdCoinData[8].InnerText; var gradeMS60 = tdCoinData[9].InnerText; var gradeMS63 = tdCoinData[10].InnerText; var gradePr65 = tdCoinData[11].InnerText; // create coin var coin = new CbCoin { Year = int.Parse(year), MintMark = mintMark, Details = details, Mintage = GetMintage(mintage) //, //GradeValues = new List<CbGradeValue> //{ // GetGradeValue(CbGrade.Good, gradeG4), // GetGradeValue(CbGrade.VeryGood, gradeVG8), // GetGradeValue(CbGrade.Fine, gradeF12), // GetGradeValue(CbGrade.VeryFine, gradeVF20), // GetGradeValue(CbGrade.ExtraFine, gradeEF40), // GetGradeValue(CbGrade.AU, gradeAU50), // GetGradeValue(CbGrade.MS60, gradeMS60), // GetGradeValue(CbGrade.MS63, gradeMS63), // GetGradeValue(CbGrade.Proof, gradePr65), //} }; coins.Add(coin); } return(coins); }
/// <summary> /// Remove a tag from the document. /// </summary> /// <param name="tag">to be removed</param> private void RemoveTag(IDomObject tag) { var e = new RemovingTagEventArgs { Tag = tag }; OnRemovingTag(e); if (!e.Cancel) { tag.Remove(); } }
private static IDomObject ApplyStyles(IDomObject domElement, StyleClass clazz) { var styles = CssElementStyleResolver.GetAllStyles(domElement, clazz); foreach (var attributeToCss in styles) { PrepareAttribute(domElement, attributeToCss); //domElement.SetAttribute(attributeToCss.AttributeName, attributeToCss.CssValue); } return(domElement); }
private void RenderChildrenInternal(IDomObject element, TextWriter writer) { if (element.HasChildren) { ParseChildren(element); } else { OutputStack.Push(new NodeStackElement(element, false, false)); } RenderStack(writer); }
public static IEnumerable <AttributeToCss> GetAllStyles(IDomObject domElement, StyleClass styleClass) { var attributeCssList = new List <AttributeToCss> { new AttributeToCss { AttributeName = "style", CssValue = styleClass.ToString() } }; attributeCssList.AddRange(CssStyleEquivalence.FindEquivalent(domElement, styleClass)); return(attributeCssList); }
private static string GetValueBasedOnNode(IDomObject node) { if (node.NodeName == "REFERENCE") return node.Attributes["Include"]; if (node.NodeName == "COMPILE") return node.Attributes["Include"]; if (node.NodeName == "EMBEDDEDRESOURCE") return node.Attributes["Include"]; return null; }
public static IEnumerable<AttributeToCss> GetAllStyles(IDomObject domElement, StyleClass styleClass) { var attributeCssList = new List<AttributeToCss>(); AddSpecialPremailerAttributes(attributeCssList, styleClass); if (styleClass.Attributes.Count > 0) attributeCssList.Add(new AttributeToCss { AttributeName = "style", CssValue = styleClass.ToString() }); attributeCssList.AddRange(CssStyleEquivalence.FindEquivalent(domElement, styleClass)); return attributeCssList; }
public static string ElementText(this IDomObject element) { StringBuilder sb = new StringBuilder(); if (element.HasChildren) { foreach (var child in element.ChildNodes.Where(item => item.NodeType == NodeType.TEXT_NODE)) { sb.Append(child.NodeValue); } } return(sb.ToString()); }
private bool UpdateJavascriptPath(bool foundFirstScript, IDomObject element) { if (foundFirstScript == false) { element.SetAttribute("src", Globals.FullPath(_scriptBundle.Path)); } else { //Just make this local, cost of removing from dom is expensive element.RemoveAttribute("src"); //element.Remove(); } return true; }
private static IDomObject ApplyStyles(IDomObject domElement, StyleClass clazz) { var styles = CssElementStyleResolver.GetAllStyles(domElement, clazz); foreach (var attributeToCss in styles) { PrepareAttribute(domElement, attributeToCss); //domElement.SetAttribute(attributeToCss.AttributeName, attributeToCss.CssValue); } if (string.IsNullOrEmpty(domElement.Attributes["style"])) domElement.RemoveAttribute("style"); return domElement; }
public override bool Matches(IDomObject element) { switch (Mode) { case Modes.Attr: return Expression.IsMatch(element[Property] ?? ""); case Modes.Css: return Expression.IsMatch(element.Style[Property] ?? ""); case Modes.Data: return Expression.IsMatch(element.Cq().DataRaw(Property) ?? ""); default: throw new NotImplementedException(); } }
private static void PrepareAttribute(IDomObject domElement, AttributeToCss attributeToCss) { string name = attributeToCss.AttributeName; string value = attributeToCss.CssValue; //When rendering images, we need to prevent breaking the WIDTH and HEIGHT attributes. See PreMailerTests.MoveCssInline_HasStyle_DoesNotBreakImageWidthAttribute(). //The old code could end up writing an image tag like <img width="206px"> which violates the HTML spec. It should render <img width="206">. if (domElement.NodeName == @"IMG" && (name == "width" || name == "height") && value.EndsWith("px")) { value = value.Replace("px", string.Empty); } domElement.SetAttribute(name, value); }
/// <summary> /// When there's no type, it must return all children that are the only one of that type /// </summary> /// <param name="parent"></param> /// <returns></returns> private IEnumerable<IDomObject> OnlyChildOfAnyType(IDomObject parent) { IDictionary<ushort, IDomElement> Types = new Dictionary<ushort, IDomElement>(); foreach (var child in parent.ChildElements) { if (Types.ContainsKey(child.NodeNameID)) { Types[child.NodeNameID] = null; } else { Types[child.NodeNameID] = child; } } // if the value is null, there was more than one of the type return Types.Values.Where(item => item != null); }
public static bool IsVisible(IDomObject element) { //ensure if a text node is passed, we start with its container. // IDomObject el = element is IDomElement ? element : element.ParentNode; while (el != null && el.NodeType == NodeType.ELEMENT_NODE) { if (ElementIsItselfHidden((IDomElement)el)) { return false; } el = el.ParentNode; } return true; }