internal MetasEnumerator(XhtmlTCollection <T> collection) { _collection = collection; if (_collection._List.Count == 0) { _index = 0; } }
/// <summary> /// This will move all the nodes from the specified index to the new parent. /// </summary> /// <param name="nodes">The collection of nodes</param> /// <param name="node_index">The index of the first node (in the above collection) to move</param> /// <param name="new_parent">The node which will become the parent of the moved nodes</param> private static void MoveNodesDown(ref XhtmlTCollection <XhtmlElement> nodes, int node_index, XhtmlElement new_parent) { for (int i = node_index; i < nodes.Count; i++) { ((XhtmlElement)new_parent).AppendChild(nodes[i]); nodes[i].SetParent(new_parent); } int c = nodes.Count; for (int i = node_index; i < c; i++) { nodes.RemoveAt(node_index); } new_parent.IsExplicitlyTerminated = true; }
/// <summary> /// This will find the corresponding opening tag for the named one. This is identified as /// the most recently read node with the same name, but with no child nodes. /// </summary> /// <param name="nodes">The collection of nodes</param> /// <param name="name">The name of the tag</param> /// <returns>The index of the opening tag, or -1 if it was not found</returns> private static int FindTagOpenNodeIndex(XhtmlTCollection <XhtmlElement> nodes, string name) { for (int index = nodes.Count - 1; index >= 0; index--) { if (nodes[index] is XhtmlElement) { if (((XhtmlElement)nodes[index]).LocalName.ToLower().Equals(name.ToLower()) && !((XhtmlElement)nodes[index]).HasChildElements && ((XhtmlElement)nodes[index]).IsTerminated == false) { return(index); } } } return(-1); }
/// <summary> /// This will parse a string containing HTML and will produce a domain tree. /// </summary> /// <param name="html">The HTML to be parsed</param> /// <param name="isRemoveEmptyElementText">The default mechanism will extract a pure DOM tree, which will contain many text nodes containing just whitespace (carriage returns etc.) However, with normal parsing, these are useless and only serve to complicate matters. Therefore, this option exists to automatically remove those empty text nodes.</param> /// <returns>A tree representing the elements</returns> public static XhtmlTCollection <XhtmlElement> Parse(string html, bool isRemoveEmptyElementText) { XhtmlTCollection <XhtmlElement> nodes = new XhtmlTCollection <XhtmlElement>(); html = PreprocessScript(html, "script"); html = PreprocessScript(html, "style"); html = RemoveComments(html); html = RemoveSGMLComments(html); StringCollection tokens = GetTokens(html); int index = 0; XhtmlSection element = null; while (index < tokens.Count) { if ("<".Equals(tokens[index])) { // Read open tag index++; if (index >= tokens.Count) { break; } string tag_name = tokens[index]; index++; element = new XhtmlSection(tag_name); // read the attributes and values while (index < tokens.Count && !">".Equals(tokens[index]) && !"/>".Equals(tokens[index])) { string attribute_name = tokens[index]; index++; if (index < tokens.Count && "=".Equals(tokens[index])) { index++; string attribute_value; if (index < tokens.Count) { attribute_value = tokens[index]; } else { attribute_value = null; } index++; XhtmlAttribute attribute = new XhtmlAttribute(attribute_name, XhtmlEncoder.Decode(attribute_value)); element.Attributes.Add(attribute); } else if (index < tokens.Count) { // Null-value attribute XhtmlAttribute attribute = new XhtmlAttribute(attribute_name, null); element.Attributes.Add(attribute); } } nodes.Add(element); if (index < tokens.Count && "/>".Equals(tokens[index])) { element.IsTerminated = true; index++; element = null; } else if (index < tokens.Count && ">".Equals(tokens[index])) { index++; } } else if (">".Equals(tokens[index])) { index++; } else if ("</".Equals(tokens[index])) { // Read close tag index++; if (index >= tokens.Count) { break; } string tag_name = tokens[index]; index++; int open_index = FindTagOpenNodeIndex(nodes, tag_name); if (open_index != -1) { MoveNodesDown(ref nodes, open_index + 1, (XhtmlElement)nodes[open_index]); } else { // Er, there is a close tag without an opening tag!! } // Skip to the end of this tag while (index < tokens.Count && !">".Equals(tokens[index])) { index++; } if (index < tokens.Count && ">".Equals(tokens[index])) { index++; } element = null; } else { // Read text string value = tokens[index]; if (isRemoveEmptyElementText) { value = RemoveWhitespace(value); } value = DecodeScript(value); if (isRemoveEmptyElementText && value.Length == 0) { // We do nothing } else { if (!(element != null && element.NoEscaping)) { value = XhtmlEncoder.Decode(value); } XhtmlText textNode = new XhtmlText(element, value); nodes.Add(textNode); //HtmlText node = new HtmlText(value); //nodes.Add(node); } index++; } } return(nodes); }