/// <summary> Collect the children. /// <p>An initial test is performed for an empty XML tag, in which case /// the start tag and end tag of the returned tag are the same and it has /// no children.<p> /// If it's not an empty XML tag, the lexer is repeatedly asked for /// subsequent nodes until an end tag is found or a node is encountered /// that matches the tag ender set or end tag ender set. /// In the latter case, a virtual end tag is created. /// Each node found that is not the end tag is added to /// the list of children. The end tag is special and not a child.<p> /// Nodes that also have a CompositeTagScanner as their scanner are /// recursed into, which provides the nested structure of an HTML page. /// This method operates in two possible modes, depending on a private boolean. /// It can recurse on the JVM stack, which has caused some overflow problems /// in the past, or it can use the supplied stack argument to nest scanning /// of child tags within itself. The former is left as an option in the code, /// mostly to help subsequent modifiers visualize what the internal nesting /// is doing. /// </summary> /// <param name="tag">The tag this scanner is responsible for. /// </param> /// <param name="lexer">The source of subsequent nodes. /// </param> /// <param name="stack">The parse stack. May contain pending tags that enclose /// this tag. /// </param> /// <returns> The resultant tag (may be unchanged). /// </returns> public override ITag Scan(ITag tag, Lexer lexer, NodeList stack) { INode node; ITag next; System.String name; IScanner scanner; ITag ret; ret = tag; if (ret.EmptyXmlTag) { ret.SetEndTag(ret); } else do { node = lexer.NextNode(false); if (null != node) { if (node is ITag) { next = (ITag) node; name = next.TagName; // check for normal end tag if (next.IsEndTag() && name.Equals(ret.TagName)) { ret.SetEndTag(next); node = null; } else if (IsTagToBeEndedFor(ret, next)) // check DTD { // backup one node. insert a virtual end tag later lexer.Position = next.StartPosition; node = null; } else if (!next.IsEndTag()) { // now recurse if there is a scanner for this type of tag scanner = next.ThisScanner; if (null != scanner) { if (mUseJVMStack) { // JVM stack recursion node = scanner.Scan(next, lexer, stack); AddChild(ret, node); } else { // fake recursion: if (scanner == this) { if (next.EmptyXmlTag) { next.SetEndTag(next); FinishTag(next, lexer); AddChild(ret, next); } else { stack.Add(ret); ret = next; } } else { // normal recursion if switching scanners node = scanner.Scan(next, lexer, stack); AddChild(ret, node); } } } else AddChild(ret, next); } else { if (!mUseJVMStack && !mLeaveEnds) { // Since all non-end tags are consumed by the // previous clause, we're here because we have an // end tag with no opening tag... this could be bad. // There are two cases... // 1) The tag hasn't been registered, in which case // we just add it as a simple child, like it's // opening tag // 2) There may be an opening tag further up the // parse stack that needs closing. // So, we ask the factory for a node like this one // (since end tags never have scanners) and see // if it's scanner is a composite tag scanner. // If it is we walk up the parse stack looking for // something that needs this end tag to finish it. // If there is something, we close off all the tags // walked over and continue on as if nothing // happened. System.Collections.ArrayList attributes = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); attributes.Add(new TagAttribute(name, null)); ITag opener = lexer.NodeFactory.CreateTagNode(lexer.Page, next.StartPosition, next.EndPosition, attributes); scanner = opener.ThisScanner; if ((null != scanner) && (scanner == this)) { // uh-oh int index = - 1; for (int i = stack.Size() - 1; (- 1 == index) && (i >= 0); i--) { // short circuit here... assume everything on the stack has this as it's scanner // we'll need to stop if either of those conditions isn't met ITag boffo = (ITag) stack.ElementAt(i); if (name.Equals(boffo.TagName)) index = i; else if (IsTagToBeEndedFor(boffo, next)) // check DTD index = i; } if (- 1 != index) { // finish off the current one first FinishTag(ret, lexer); AddChild((ITag) stack.ElementAt(stack.Size() - 1), ret); for (int i = stack.Size() - 1; i > index; i--) { ITag fred = (ITag) stack.Remove(i); FinishTag(fred, lexer); AddChild((ITag) stack.ElementAt(i - 1), fred); } ret = (ITag) stack.Remove(index); node = null; } else AddChild(ret, next); // default behaviour } else AddChild(ret, next); // default behaviour } else AddChild(ret, next); } } else { AddChild(ret, node); node.DoSemanticAction(); } } if (!mUseJVMStack) { // handle coming out of fake recursion if (null == node) { int depth = stack.Size(); if (0 != depth) { node = stack.ElementAt(depth - 1); if (node is ITag) { ITag precursor = (ITag) node; scanner = precursor.ThisScanner; if (scanner == this) { stack.Remove(depth - 1); FinishTag(ret, lexer); AddChild(precursor, ret); ret = precursor; } else node = null; // normal recursion } else node = null; // normal recursion } } } } while (null != node); FinishTag(ret, lexer); return (ret); }
/// <summary> Finds a text node, however embedded it might be, and returns /// it. The text node will retain links to its parents, so /// further navigation is possible. /// </summary> /// <param name="searchText">The text to search for. /// </param> /// <returns> The list of text nodes (recursively) found. /// </returns> public virtual IText[] DigupStringNode(System.String searchText) { NodeList nodeList = SearchFor(searchText); NodeList stringNodes = new NodeList(); for (int i = 0; i < nodeList.Size(); i++) { INode node = nodeList.ElementAt(i); if (node is IText) { stringNodes.Add(node); } else { if (node is CompositeTag) { CompositeTag ctag = (CompositeTag) node; IText[] nodes = ctag.DigupStringNode(searchText); for (int j = 0; j < nodes.Length; j++) stringNodes.Add(nodes[j]); } } } IText[] stringNode = new IText[stringNodes.Size()]; for (int i = 0; i < stringNode.Length; i++) { stringNode[i] = (IText) stringNodes.ElementAt(i); } return stringNode; }