/// <summary> Collect the children.
		/// <p>An initial test is performed for an empty XML tag, in which case
		/// the start tag and end tag of the returned tag are the same and it has
		/// no children.<p>
		/// If it's not an empty XML tag, the lexer is repeatedly asked for
		/// subsequent nodes until an end tag is found or a node is encountered
		/// that matches the tag ender set or end tag ender set.
		/// In the latter case, a virtual end tag is created.
		/// Each node found that is not the end tag is added to
		/// the list of children. The end tag is special and not a child.<p>
		/// Nodes that also have a CompositeTagScanner as their scanner are
		/// recursed into, which provides the nested structure of an HTML page.
		/// This method operates in two possible modes, depending on a private boolean.
		/// It can recurse on the JVM stack, which has caused some overflow problems
		/// in the past, or it can use the supplied stack argument to nest scanning
		/// of child tags within itself. The former is left as an option in the code,
		/// mostly to help subsequent modifiers visualize what the internal nesting
		/// is doing.
		/// </summary>
		/// <param name="tag">The tag this scanner is responsible for.
		/// </param>
		/// <param name="lexer">The source of subsequent nodes.
		/// </param>
		/// <param name="stack">The parse stack. May contain pending tags that enclose
		/// this tag.
		/// </param>
		/// <returns> The resultant tag (may be unchanged).
		/// </returns>
		public override ITag Scan(ITag tag, Lexer lexer, NodeList stack)
		{
			INode node;
			ITag next;
			System.String name;
			IScanner scanner;
			ITag ret;
			
			ret = tag;
			
			if (ret.EmptyXmlTag)
			{
				ret.SetEndTag(ret);
			}
			else
				do 
				{
					node = lexer.NextNode(false);
					if (null != node)
					{
						if (node is ITag)
						{
							next = (ITag) node;
							name = next.TagName;
							// check for normal end tag
							if (next.IsEndTag() && name.Equals(ret.TagName))
							{
								ret.SetEndTag(next);
								node = null;
							}
							else if (IsTagToBeEndedFor(ret, next))
								// check DTD
							{
								// backup one node. insert a virtual end tag later
								lexer.Position = next.StartPosition;
								node = null;
							}
							else if (!next.IsEndTag())
							{
								// now recurse if there is a scanner for this type of tag
								scanner = next.ThisScanner;
								if (null != scanner)
								{
									if (mUseJVMStack)
									{
										// JVM stack recursion
										node = scanner.Scan(next, lexer, stack);
										AddChild(ret, node);
									}
									else
									{
										// fake recursion:
										if (scanner == this)
										{
											if (next.EmptyXmlTag)
											{
												next.SetEndTag(next);
												FinishTag(next, lexer);
												AddChild(ret, next);
											}
											else
											{
												stack.Add(ret);
												ret = next;
											}
										}
										else
										{
											// normal recursion if switching scanners
											node = scanner.Scan(next, lexer, stack);
											AddChild(ret, node);
										}
									}
								}
								else
									AddChild(ret, next);
							}
							else
							{
								if (!mUseJVMStack && !mLeaveEnds)
								{
									// Since all non-end tags are consumed by the
									// previous clause, we're here because we have an
									// end tag with no opening tag... this could be bad.
									// There are two cases...
									// 1) The tag hasn't been registered, in which case
									// we just add it as a simple child, like it's
									// opening tag
									// 2) There may be an opening tag further up the
									// parse stack that needs closing.
									// So, we ask the factory for a node like this one
									// (since end tags never have scanners) and see
									// if it's scanner is a composite tag scanner.
									// If it is we walk up the parse stack looking for
									// something that needs this end tag to finish it.
									// If there is something, we close off all the tags
									// walked over and continue on as if nothing
									// happened.
									System.Collections.ArrayList attributes = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
									attributes.Add(new TagAttribute(name, null));
									ITag opener = lexer.NodeFactory.CreateTagNode(lexer.Page, next.StartPosition, next.EndPosition, attributes);
									
									scanner = opener.ThisScanner;
									if ((null != scanner) && (scanner == this))
									{
										// uh-oh
										int index = - 1;
										for (int i = stack.Size() - 1; (- 1 == index) && (i >= 0); i--)
										{
											// short circuit here... assume everything on the stack has this as it's scanner
											// we'll need to stop if either of those conditions isn't met
											ITag boffo = (ITag) stack.ElementAt(i);
											if (name.Equals(boffo.TagName))
												index = i;
											else if (IsTagToBeEndedFor(boffo, next))
												// check DTD
												index = i;
										}
										if (- 1 != index)
										{
											// finish off the current one first
											FinishTag(ret, lexer);
											AddChild((ITag) stack.ElementAt(stack.Size() - 1), ret);
											for (int i = stack.Size() - 1; i > index; i--)
											{
												ITag fred = (ITag) stack.Remove(i);
												FinishTag(fred, lexer);
												AddChild((ITag) stack.ElementAt(i - 1), fred);
											}
											ret = (ITag) stack.Remove(index);
											node = null;
										}
										else
											AddChild(ret, next); // default behaviour
									}
									else
										AddChild(ret, next); // default behaviour
								}
								else
									AddChild(ret, next);
							}
						}
						else
						{
							AddChild(ret, node);
							node.DoSemanticAction();
						}
					}
					
					if (!mUseJVMStack)
					{
						// handle coming out of fake recursion
						if (null == node)
						{
							int depth = stack.Size();
							if (0 != depth)
							{
								node = stack.ElementAt(depth - 1);
								if (node is ITag)
								{
									ITag precursor = (ITag) node;
									scanner = precursor.ThisScanner;
									if (scanner == this)
									{
										stack.Remove(depth - 1);
										FinishTag(ret, lexer);
										AddChild(precursor, ret);
										ret = precursor;
									}
									else
										node = null; // normal recursion
								}
								else
									node = null; // normal recursion
							}
						}
					}
				}
				while (null != node);
			
			FinishTag(ret, lexer);
			
			return (ret);
		}
Example #2
0
		/// <summary> Finds a text node, however embedded it might be, and returns
		/// it. The text node will retain links to its parents, so
		/// further navigation is possible.
		/// </summary>
		/// <param name="searchText">The text to search for.
		/// </param>
		/// <returns> The list of text nodes (recursively) found.
		/// </returns>
		public virtual IText[] DigupStringNode(System.String searchText)
		{
			NodeList nodeList = SearchFor(searchText);
			NodeList stringNodes = new NodeList();
			for (int i = 0; i < nodeList.Size(); i++)
			{
				INode node = nodeList.ElementAt(i);
				if (node is IText)
				{
					stringNodes.Add(node);
				}
				else
				{
					if (node is CompositeTag)
					{
						CompositeTag ctag = (CompositeTag) node;
						IText[] nodes = ctag.DigupStringNode(searchText);
						for (int j = 0; j < nodes.Length; j++)
							stringNodes.Add(nodes[j]);
					}
				}
			}
			IText[] stringNode = new IText[stringNodes.Size()];
			for (int i = 0; i < stringNode.Length; i++)
			{
				stringNode[i] = (IText) stringNodes.ElementAt(i);
			}
			return stringNode;
		}