private void GetChildren(JsonizeNode parentJsonizeNode, HtmlNode parentHtmlNode) { foreach (HtmlNode htmlNode in parentHtmlNode.ChildNodes) { bool addToParent = false; JsonizeNode childJsonizeNode = new JsonizeNode(); if (parentJsonizeNode.Children == null) { parentJsonizeNode.Children = new List <JsonizeNode>(); } if (!htmlNode.Name.StartsWith("#")) { childJsonizeNode.Tag = htmlNode.Name; addToParent = true; } string innerText = HtmlDecode(_textTrimHandling == TextTrimHandling.Trim ? htmlNode.InnerText.Trim() : htmlNode.InnerText); if (_emptyTextNodeHandling == EmptyTextNodeHandling.Include || !string.IsNullOrWhiteSpace(innerText)) { if (!htmlNode.HasChildNodes) { childJsonizeNode.Text = innerText.Equals("") ? null : innerText; } addToParent = true; } childJsonizeNode.Node = htmlNode.NodeType.ToString(); if (htmlNode.HasAttributes) { if (childJsonizeNode.Attributes == null) { childJsonizeNode.Attributes = new System.Dynamic.ExpandoObject(); } AddAttributes(htmlNode, childJsonizeNode); addToParent = true; } if (htmlNode.HasChildNodes) { GetChildren(childJsonizeNode, htmlNode); addToParent = true; } if (addToParent) { parentJsonizeNode.Children.Add(childJsonizeNode); } } if (parentJsonizeNode.Children.Count == 0) { parentJsonizeNode.Children = null; } }
/// <summary> /// Returns a <see cref="JsonizeNode"/> of the HTML document. /// </summary> /// <returns>The JSON representation of an HTML document as a <see cref="JsonizeNode"/>.</returns> public JsonizeNode ParseHtmlAsJsonizeNode() { JsonizeNode parentJsonizeNode = new JsonizeNode(); HtmlNode parentHtmlNode = _htmlDoc.DocumentNode; parentJsonizeNode.Node = parentHtmlNode.NodeType.ToString(); GetChildren(parentJsonizeNode, parentHtmlNode); return(parentJsonizeNode); }
/// <summary> /// Returns a <see cref="JObject"/> of the HTML document. /// </summary> /// <returns>The JSON representation of an HTML document as a <see cref="JObject"/>.</returns> public JObject ParseHtmlAsJson() { JsonizeNode parentJsonizeNode = new JsonizeNode(); HtmlNode parentHtmlNode = _htmlDoc.DocumentNode; parentJsonizeNode.Node = parentHtmlNode.NodeType.ToString(); GetChildren(parentJsonizeNode, parentHtmlNode); JsonSerializer jsonWriter = new JsonSerializer { NullValueHandling = (Newtonsoft.Json.NullValueHandling)_nullValueHandling }; return(JObject.FromObject(parentJsonizeNode, jsonWriter)); }
private void AddAttributes(HtmlNode htmlNode, JsonizeNode childJsonizeNode) { IDictionary <string, object> attributeDict = childJsonizeNode.Attributes; List <HtmlAttribute> attributes = htmlNode.Attributes.ToList(); foreach (HtmlAttribute attribute in attributes) { if (attribute.Name.Equals("class") && _classAttributeHandling == ClassAttributeHandling.Array) { string[] classes = attribute.Value.Split(' '); List <string> classList = classes.ToList(); attributeDict["class"] = classList; } else { attributeDict[attribute.Name] = attribute.Value; } } }
public JsonizeMeta(JsonizeNode jsonizeNode, string url) { Url = url; Domain = url.Split('/')[2]; DocumentJsonizeNode = jsonizeNode; }