Exemplo n.º 1
0
        /* create style element using rules from dictionary */
        private void CreateStyleElement(Lexer lexer, Node doc)
        {
            Node node, head, body;
            Style style;
            AttVal av;

            if (lexer.styles == null && NiceBody(lexer, doc))
            {
                return;
            }

            node = lexer.NewNode(Node.StartTag, null, 0, 0, "style");
            node.Isimplicit = true;

            /* insert type attribute */
            av = new AttVal(null, null, '"', "type", "text/css");
            av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av);
            node.Attributes = av;

            body = doc.FindBody(lexer.Options.tt);

            lexer.txtstart = lexer.lexsize;

            if (body != null)
            {
                CleanBodyAttrs(lexer, body);
            }

            for (style = lexer.styles; style != null; style = style.Next)
            {
                lexer.AddCharToLexer(' ');
                lexer.AddStringLiteral(style.Tag);
                lexer.AddCharToLexer('.');
                lexer.AddStringLiteral(style.TagClass);
                lexer.AddCharToLexer(' ');
                lexer.AddCharToLexer('{');
                lexer.AddStringLiteral(style.Properties);
                lexer.AddCharToLexer('}');
                lexer.AddCharToLexer('\n');
            }

            lexer.txtend = lexer.lexsize;

            Node.InsertNodeAtEnd(node, lexer.NewNode(Node.TextNode, lexer.lexbuf, lexer.txtstart, lexer.txtend));

            /*
            now insert style element into document head

            doc is root node. search its children for html node
            the head node should be first child of html node
            */

            head = doc.FindHead(lexer.Options.tt);

            if (head != null)
            {
                Node.InsertNodeAtEnd(head, node);
            }
        }
		/*
		HTML is the top level element
		*/
		public static Node parseDocument(Lexer lexer)
		{
			Node node, document, html;
			Node doctype = null;
			TagTable tt = lexer.Options.tt;

			document = lexer.NewNode();
			document.Type = Node.RootNode;
			
			while (true)
			{
				node = lexer.GetToken(Lexer.IgnoreWhitespace);
				if (node == null)
				{
					break;
				}
				
				/* deal with comments etc. */
				if (Node.InsertMisc(document, node))
				{
					continue;
				}
				
				if (node.Type == Node.DocTypeTag)
				{
					if (doctype == null)
					{
						Node.InsertNodeAtEnd(document, node);
						doctype = node;
					}
					else
					{
						Report.Warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
					}
					continue;
				}
				
				if (node.Type == Node.EndTag)
				{
					Report.Warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
					continue;
				}
				
				if (node.Type != Node.StartTag || node.Tag != tt.TagHtml)
				{
					lexer.UngetToken();
					html = lexer.InferredTag("html");
				}
				else
				{
					html = node;
				}
				
				Node.InsertNodeAtEnd(document, html);
				ParseHTML.Parse(lexer, html, (short) 0); // TODO?
				break;
			}
			
			return document;
		}
		public static Node parseXMLDocument(Lexer lexer)
		{
			Node node, document, doctype;
			
			document = lexer.NewNode();
			document.Type = Node.RootNode;
			doctype = null;
			lexer.Options.XmlTags = true;
			
			while (true)
			{
				node = lexer.GetToken(Lexer.IgnoreWhitespace);
				if (node == null)
				{
					break;
				}

				/* discard unexpected end tags */
				if (node.Type == Node.EndTag)
				{
					Report.Warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
					continue;
				}
				
				/* deal with comments etc. */
				if (Node.InsertMisc(document, node))
				{
					continue;
				}
				
				if (node.Type == Node.DocTypeTag)
				{
					if (doctype == null)
					{
						Node.InsertNodeAtEnd(document, node);
						doctype = node;
					}
					else
					{
						Report.Warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
					}
					// TODO
					continue;
				}
				
				/* if start tag then parse element's content */
				if (node.Type == Node.StartTag)
				{
					Node.InsertNodeAtEnd(document, node);
					parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
				}
			}
			
			if (doctype != null && !lexer.CheckDocTypeKeyWords(doctype))
			{
				Report.Warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
			}
			
			/* ensure presence of initial <?XML version="1.0"?> */
			if (lexer.Options.XmlPi)
			{
				lexer.FixXmlPI(document);
			}
			
			return document;
		}
Exemplo n.º 4
0
        /*
        This maps
        <p>hello<em> world</em>
        to
        <p>hello <em>world</em>

        Trims initial space, by moving it before the
        start tag, or if this element is the first in
        parent's content, then by discarding the space
        */
        public static void TrimInitialSpace(Lexer lexer, Node element, Node text)
        {
            Node prev, node;

            // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
            //      31-Oct-00.
            if (text.Type == TextNode && text.Textarray[text.Start] == (byte) ' ' && (text.Start < text.End))
            {
                if (((element.Tag.Model & ContentModel.Inline) != 0) && !((element.Tag.Model & ContentModel.Field) != 0) && element.Parent.Content != element)
                {
                    prev = element.Prev;

                    if (prev != null && prev.Type == TextNode)
                    {
                        if (prev.Textarray[prev.End - 1] != (byte) ' ')
                        {
                            prev.Textarray[prev.End++] = (byte) ' ';
                        }

                        ++element.Start;
                    }
                    /* create new node */
                    else
                    {
                        node = lexer.NewNode();
                        // Local fix for bug 228486 (GLP).  This handles the case
                        // where we need to create a preceeding text node but there are
                        // no "slots" in textarray that we can steal from the current
                        // element.  Therefore, we create a new textarray containing
                        // just the blank.  When Tidy is fixed, this should be removed.
                        if (element.Start >= element.End)
                        {
                            node.Start = 0;
                            node.End = 1;
                            node.Textarray = new byte[1];
                        }
                        else
                        {
                            node.Start = element.Start++;
                            node.End = element.Start;
                            node.Textarray = element.Textarray;
                        }
                        node.Textarray[node.Start] = (byte) ' ';
                        node.Prev = prev;
                        if (prev != null)
                        {
                            prev.Next = node;
                        }
                        node.Next = element;
                        element.Prev = node;
                        node.Parent = element.Parent;
                    }
                }

                /* discard the space  in current node */
                ++text.Start;
            }
        }