Lexer for html parser (c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.cs for the copyright notice. Derived from HTML Tidy Release 4 Aug 2000
Given a file stream fp it returns a sequence of tokens. GetToken(fp) gets the next token UngetToken(fp) provides one level undo The tags include an attribute list: - linked list of attribute/value nodes - each node has 2 null-terminated strings. - entities are replaced in attribute values white space is compacted if not in preformatted mode If not in preformatted mode then leading white space is discarded and subsequent white space sequences compacted to single space chars. If XmlTags is no then Tag names are folded to upper case and attribute names to lower case. Not yet done: - Doctype subset and marked sections
        public virtual void Check(Lexer lexer, Node node)
        {
            node.CheckUniqueAttributes(lexer);

            AttVal lang = node.GetAttrByName("language");
            AttVal type = node.GetAttrByName("type");
            if (type == null)
            {
                Report.AttrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);

                /* check for javascript */
                if (lang != null)
                {
                    string str = lang.Val;
                    if (str.Length > 10)
                    {
                        str = str.Substring(0, 10);
                    }

                    if ((String.CompareOrdinal(str, "javascript") == 0) || (String.CompareOrdinal(str, "jscript") == 0))
                    {
                        node.AddAttribute("type", "text/javascript");
                    }
                }
                else
                {
                    node.AddAttribute("type", "text/javascript");
                }
            }
        }
        public virtual void Check(Lexer lexer, Node node, AttVal attval)
        {
            string val = attval.Val;

            if (val == null)
            {
                Report.AttrError(lexer, node, attval.Attribute, Report.MISSING_ATTR_VALUE);
            }
            else if (String.CompareOrdinal(val, "top") == 0 || String.CompareOrdinal(val, "middle") == 0 ||
                     String.CompareOrdinal(val, "bottom") == 0 || String.CompareOrdinal(val, "baseline") == 0)
            {
                /* all is fine */
            }
            else if (String.CompareOrdinal(val, "left") == 0 || String.CompareOrdinal(val, "right") == 0)
            {
                if (!(node.Tag != null && ((node.Tag.Model & ContentModel.IMG) != 0)))
                {
                    Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE);
                }
            }
            else if (String.CompareOrdinal(val, "texttop") == 0 || String.CompareOrdinal(val, "absmiddle") == 0 ||
                     String.CompareOrdinal(val, "absbottom") == 0 || String.CompareOrdinal(val, "textbottom") == 0)
            {
                lexer.Versions &= HtmlVersion.Proprietary;
                Report.AttrError(lexer, node, val, Report.PROPRIETARY_ATTR_VALUE);
            }
            else
            {
                Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE);
            }
        }
        public virtual void Check(Lexer lexer, Node node)
        {
            AttVal attval;
            string val = null;

            node.CheckUniqueAttributes(lexer);

            for (attval = node.Attributes; attval != null; attval = attval.Next)
            {
                if (String.CompareOrdinal(attval.Attribute, "align") == 0)
                {
                    val = attval.Val;
                    break;
                }
            }

            if (val != null)
            {
                if (String.CompareOrdinal(val, "left") == 0 || String.CompareOrdinal(val, "right") == 0)
                {
                    lexer.Versions &= HtmlVersion.Html40Loose | HtmlVersion.Frames;
                }
                else if (String.CompareOrdinal(val, "top") == 0 || String.CompareOrdinal(val, "bottom") == 0)
                {
                    lexer.Versions &= HtmlVersion.From32;
                }
                else
                {
                    Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE);
                }
            }
        }
Beispiel #4
0
 internal TidyMessage(Lexer lexer, string message, MessageLevel level)
 {
     _filename = String.Empty;
     _line = lexer.Lines;
     _column = lexer.Columns;
     _message = message;
     _level = level;
 }
        public virtual void Check(Lexer lexer, Node node, AttVal attval)
        {
            /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
            if (node.Tag != null && ((node.Tag.Model & ContentModel.IMG) != 0))
            {
                AttrCheckImpl.CheckValign.Check(lexer, node, attval);
                return;
            }

            string val = attval.Val;

            if (val == null)
            {
                Report.AttrError(lexer, node, attval.Attribute, Report.MISSING_ATTR_VALUE);
            }
            else if (
                !(String.CompareOrdinal(val, "left") == 0 || String.CompareOrdinal(val, "center") == 0 ||
                  String.CompareOrdinal(val, "right") == 0 || String.CompareOrdinal(val, "justify") == 0))
            {
                Report.AttrError(lexer, node, attval.Val, Report.BAD_ATTRIBUTE_VALUE);
            }
        }
Beispiel #6
0
        /*
        move presentation attribs from body to style element

        background="foo" ->  body { background-image: url(foo) }
        bgcolor="foo"    ->  body { background-color: foo }
        text="foo"       ->  body { color: foo }
        link="foo"       ->  :link { color: foo }
        vlink="foo"      ->  :visited { color: foo }
        alink="foo"      ->  :active { color: foo }
        */
        private void CleanBodyAttrs(Lexer lexer, Node body)
        {
            string bgurl = null;
            string bgcolor = null;
            string color = null;

            AttVal attr = body.GetAttrByName("background");

            if (attr != null)
            {
                bgurl = attr.Val;
                attr.Val = null;
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("bgcolor");

            if (attr != null)
            {
                bgcolor = attr.Val;
                attr.Val = null;
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("text");

            if (attr != null)
            {
                color = attr.Val;
                attr.Val = null;
                body.RemoveAttribute(attr);
            }

            if (bgurl != null || bgcolor != null || color != null)
            {
                lexer.AddStringLiteral(" body {\n");

                if (bgurl != null)
                {
                    lexer.AddStringLiteral("  background-image: url(");
                    lexer.AddStringLiteral(bgurl);
                    lexer.AddStringLiteral(");\n");
                }

                if (bgcolor != null)
                {
                    lexer.AddStringLiteral("  background-color: ");
                    lexer.AddStringLiteral(bgcolor);
                    lexer.AddStringLiteral(";\n");
                }

                if (color != null)
                {
                    lexer.AddStringLiteral("  color: ");
                    lexer.AddStringLiteral(color);
                    lexer.AddStringLiteral(";\n");
                }

                lexer.AddStringLiteral(" }\n");
            }

            attr = body.GetAttrByName("link");

            if (attr != null)
            {
                AddColorRule(lexer, " :link", attr.Val);
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("vlink");

            if (attr != null)
            {
                AddColorRule(lexer, " :visited", attr.Val);
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("alink");

            if (attr != null)
            {
                AddColorRule(lexer, " :active", attr.Val);
                body.RemoveAttribute(attr);
            }
        }
Beispiel #7
0
        /*
        Applies all matching rules to a node.
        */
        private Node CleanNode(Lexer lexer, Node node)
        {
            Node next;
            var o = new MutableObject();

            for (next = node; node.IsElement; node = next)
            {
                o.Object = next;

                bool b = Dir2Div(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = NestedList(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Center2Div(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = MergeDivs(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = BlockStyle(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = InlineStyle(lexer, node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Font2Span(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                break;
            }

            return next;
        }
Beispiel #8
0
        private Node CreateStyleProperties(Lexer lexer, Node node)
        {
            if (node.Content != null)
            {
                Node child;
                for (child = node.Content; child != null; child = child.Next)
                {
                    child = CreateStyleProperties(lexer, child);
                }
            }

            return CleanNode(lexer, node);
        }
Beispiel #9
0
        private string FindStyle(Lexer lexer, string tag, string properties)
        {
            Style style;

            for (style = lexer.Styles; style != null; style = style.Next)
            {
                if (style.Tag.Equals(tag) && style.Properties.Equals(properties))
                {
                    return style.TagClass;
                }
            }

            style = new Style(tag, GenSymClass(), properties, lexer.Styles);
            lexer.Styles = style;
            return style.TagClass;
        }
Beispiel #10
0
        public virtual void CreateSlides(Lexer lexer, Node root)
        {
            Out output = new OutImpl();

            Node body = root.FindBody(lexer.Options.TagTable);
            _count = CountSlides(body);
            _slidecontent = body.Content;
            AddTransitionEffect(lexer, root, EFFECT_BLEND, 3.0);

            for (_slide = 1; _slide <= _count; ++_slide)
            {
                string buf = "slide" + _slide + ".html";
                output.State = StreamIn.FSM_ASCII;
                output.Encoding = _options.CharEncoding;

                try
                {
                    output.Output = new MemoryStream();
                    PrintTree(output, 0, 0, lexer, root);
                    FlushLine(output, 0);
                }
                catch (IOException e)
                {
                    Debug.WriteLine(buf + e);
                }
            }
        }
Beispiel #11
0
        public virtual void PrintTree(Out fout, int mode, int indent, Lexer lexer, Node node)
        {
            Node content;
            TagCollection tt = _options.TagTable;

            if (node == null)
                return;

            if (node.Type == Node.TEXT_NODE)
            {
                PrintText(fout, mode, indent, node.Textarray, node.Start, node.End);
            }
            else if (node.Type == Node.COMMENT_TAG)
            {
                PrintComment(fout, indent, node);
            }
            else if (node.Type == Node.ROOT_NODE)
            {
                for (content = node.Content; content != null; content = content.Next)
                {
                    PrintTree(fout, mode, indent, lexer, content);
                }
            }
            else if (node.Type == Node.DOC_TYPE_TAG)
            {
                PrintDocType(fout, indent, node);
            }
            else if (node.Type == Node.PROC_INS_TAG)
            {
                PrintPi(fout, indent, node);
            }
            else if (node.Type == Node.CDATA_TAG)
            {
                PrintCdata(fout, indent, node);
            }
            else if (node.Type == Node.SECTION_TAG)
            {
                PrintSection(fout, indent, node);
            }
            else if (node.Type == Node.ASP_TAG)
            {
                PrintAsp(fout, indent, node);
            }
            else if (node.Type == Node.JSTE_TAG)
            {
                PrintJste(fout, indent, node);
            }
            else if (node.Type == Node.PHP_TAG)
            {
                PrintPhp(fout, indent, node);
            }
            else if ((node.Tag.Model & ContentModel.EMPTY) != 0 || node.Type == Node.START_END_TAG)
            {
                if ((node.Tag.Model & ContentModel.INLINE) == 0)
                {
                    CondFlushLine(fout, indent);
                }

                if (node.Tag == tt.TagBr && node.Prev != null && node.Prev.Tag != tt.TagBr && _options.BreakBeforeBr)
                {
                    FlushLine(fout, indent);
                }

                if (_options.MakeClean && node.Tag == tt.TagWbr)
                {
                    PrintString(" ");
                }
                else
                {
                    PrintTag(lexer, fout, mode, indent, node);
                }

                if (node.Tag == tt.TagParam || node.Tag == tt.TagArea)
                {
                    CondFlushLine(fout, indent);
                }
                else if (node.Tag == tt.TagBr || node.Tag == tt.TagHr)
                {
                    FlushLine(fout, indent);
                }
            }
            else
            {
                /* some kind of container element */
                if (node.Tag != null && node.Tag.Parser == ParserImpl.ParsePre)
                {
                    CondFlushLine(fout, indent);

                    indent = 0;
                    CondFlushLine(fout, indent);
                    PrintTag(lexer, fout, mode, indent, node);
                    FlushLine(fout, indent);

                    for (content = node.Content; content != null; content = content.Next)
                    {
                        PrintTree(fout, (mode | PREFORMATTED | NOWRAP), indent, lexer, content);
                    }

                    CondFlushLine(fout, indent);
                    PrintEndTag(node);
                    FlushLine(fout, indent);

                    if (_options.IndentContent == false && node.Next != null)
                    {
                        FlushLine(fout, indent);
                    }
                }
                else if (node.Tag == tt.TagStyle || node.Tag == tt.TagScript)
                {
                    CondFlushLine(fout, indent);

                    indent = 0;
                    CondFlushLine(fout, indent);
                    PrintTag(lexer, fout, mode, indent, node);
                    FlushLine(fout, indent);

                    for (content = node.Content; content != null; content = content.Next)
                    {
                        PrintTree(fout, (mode | PREFORMATTED | NOWRAP | CDATA), indent, lexer, content);
                    }

                    CondFlushLine(fout, indent);
                    PrintEndTag(node);
                    FlushLine(fout, indent);

                    if (_options.IndentContent == false && node.Next != null)
                    {
                        FlushLine(fout, indent);
                    }
                }
                else if ((node.Tag.Model & ContentModel.INLINE) != 0)
                {
                    if (_options.MakeClean)
                    {
                        /* discards <font> and </font> tags */
                        if (node.Tag == tt.TagFont)
                        {
                            for (content = node.Content; content != null; content = content.Next)
                            {
                                PrintTree(fout, mode, indent, lexer, content);
                            }
                            return;
                        }

                        /* replace <nobr>...</nobr> by &nbsp; or &#160; etc. */
                        if (node.Tag == tt.TagNobr)
                        {
                            for (content = node.Content; content != null; content = content.Next)
                            {
                                PrintTree(fout, (mode | NOWRAP), indent, lexer, content);
                            }
                            return;
                        }
                    }

                    /* otherwise a normal inline element */

                    PrintTag(lexer, fout, mode, indent, node);

                    /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */

                    if (ShouldIndent(node))
                    {
                        CondFlushLine(fout, indent);
                        indent += _options.Spaces;

                        for (content = node.Content; content != null; content = content.Next)
                        {
                            PrintTree(fout, mode, indent, lexer, content);
                        }

                        CondFlushLine(fout, indent);
                        indent -= _options.Spaces;
                        CondFlushLine(fout, indent);
                    }
                    else
                    {
                        for (content = node.Content; content != null; content = content.Next)
                        {
                            PrintTree(fout, mode, indent, lexer, content);
                        }
                    }

                    PrintEndTag(node);
                }
                else
                {
                    /* other tags */
                    CondFlushLine(fout, indent);

                    if (_options.SmartIndent && node.Prev != null)
                    {
                        FlushLine(fout, indent);
                    }

                    if (_options.HideEndTags == false ||
                        !(node.Tag != null && ((node.Tag.Model & ContentModel.OMIT_ST) != 0)))
                    {
                        PrintTag(lexer, fout, mode, indent, node);

                        if (ShouldIndent(node))
                        {
                            CondFlushLine(fout, indent);
                        }
                        else if ((node.Tag.Model & ContentModel.HTML) != 0 || node.Tag == tt.TagNoframes ||
                                 ((node.Tag.Model & ContentModel.HEAD) != 0 && node.Tag != tt.TagTitle))
                        {
                            FlushLine(fout, indent);
                        }
                    }

                    if (node.Tag == tt.TagBody && _options.BurstSlides)
                    {
                        PrintSlide(fout, mode, (_options.IndentContent ? indent + _options.Spaces : indent), lexer);
                    }
                    else
                    {
                        Node last = null;

                        for (content = node.Content; content != null; content = content.Next)
                        {
                            /* kludge for naked text before block level tag */
                            if (last != null && !_options.IndentContent && last.Type == Node.TEXT_NODE &&
                                content.Tag != null && (content.Tag.Model & ContentModel.BLOCK) != 0)
                            {
                                FlushLine(fout, indent);
                                FlushLine(fout, indent);
                            }

                            PrintTree(fout, mode, (ShouldIndent(node) ? indent + _options.Spaces : indent), lexer,
                                      content);

                            last = content;
                        }
                    }

                    /* don't flush line for td and th */
                    if (ShouldIndent(node) ||
                        (((node.Tag.Model & ContentModel.HTML) != 0 || node.Tag == tt.TagNoframes ||
                          ((node.Tag.Model & ContentModel.HEAD) != 0 && node.Tag != tt.TagTitle)) &&
                         _options.HideEndTags == false))
                    {
                        CondFlushLine(fout, (_options.IndentContent ? indent + _options.Spaces : indent));

                        if (_options.HideEndTags == false || (node.Tag.Model & ContentModel.OPT) == 0)
                        {
                            PrintEndTag(node);
                            FlushLine(fout, indent);
                        }
                    }
                    else
                    {
                        if (_options.HideEndTags == false || (node.Tag.Model & ContentModel.OPT) == 0)
                        {
                            PrintEndTag(node);
                        }

                        FlushLine(fout, indent);
                    }

                    if (_options.IndentContent == false && node.Next != null && _options.HideEndTags == false &&
                        (node.Tag.Model &
                         (ContentModel.BLOCK | ContentModel.LIST | ContentModel.DEFLIST | ContentModel.TABLE)) != 0)
                    {
                        FlushLine(fout, indent);
                    }
                }
            }
        }
Beispiel #12
0
        /*
        Find style attribute in node, and replace it
        by corresponding class attribute. Search for
        class in style dictionary otherwise gensym
        new class and add to dictionary.

        Assumes that node doesn't have a class attribute
        */
        private void Style2Rule(Lexer lexer, Node node)
        {
            AttVal styleattr = node.GetAttrByName("style");

            if (styleattr == null) return;
            string classname = FindStyle(lexer, node.Element, styleattr.Val);
            AttVal classattr = node.GetAttrByName("class");

            /*
                if there already is a class attribute
                then append class name after a space
                */
            if (classattr != null)
            {
                classattr.Val = classattr.Val + " " + classname;
                node.RemoveAttribute(styleattr);
            }
            else
            {
                /* reuse style attribute for class attribute */
                styleattr.Attribute = "class";
                styleattr.Val = classname;
            }
        }
Beispiel #13
0
        private void PrintTag(Lexer lexer, Out fout, int mode, int indent, Node node)
        {
            TagCollection tt = _options.TagTable;

            AddC('<', _linelen++);

            if (node.Type == Node.END_TAG)
            {
                AddC('/', _linelen++);
            }

            string p = node.Element;
            for (int i = 0; i < p.Length; i++)
            {
                AddC(Lexer.FoldCase(p[i], _options.UpperCaseTags, _options.XmlTags), _linelen++);
            }

            PrintAttrs(fout, indent, node, node.Attributes);

            if ((_options.XmlOut || lexer != null && lexer.Isvoyager) &&
                (node.Type == Node.START_END_TAG || (node.Tag.Model & ContentModel.EMPTY) != 0))
            {
                AddC(' ', _linelen++); /* compatibility hack */
                AddC('/', _linelen++);
            }

            AddC('>', _linelen++);

            if (node.Type == Node.START_END_TAG || (mode & PREFORMATTED) != 0) return;
            if (indent + _linelen >= _options.WrapLen)
            {
                WrapLine(fout, indent);
            }

            if (indent + _linelen < _options.WrapLen)
            {
                /*
                    wrap after start tag if is <br/> or if it's not
                    inline or it is an empty tag followed by </a>
                    */
                if (AfterSpace(node))
                {
                    if ((mode & NOWRAP) == 0 &&
                        ((node.Tag.Model & ContentModel.INLINE) == 0 || (node.Tag == tt.TagBr) ||
                         (((node.Tag.Model & ContentModel.EMPTY) != 0) && node.Next == null &&
                          node.Parent.Tag == tt.TagA)))
                    {
                        _wraphere = _linelen;
                    }
                }
            }
            else
            {
                CondFlushLine(fout, indent);
            }
        }
Beispiel #14
0
        /*
        Symptom: <ul><li><ul>...</ul></li></ul>
        Action: discard outer list
        */
        private bool NestedList(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagUl || node.Tag == _tt.TagOl)
            {
                Node child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                Node list = child.Content;

                if (list == null)
                {
                    return false;
                }

                if (list.Tag != node.Tag)
                {
                    return false;
                }

                pnode.Object = node.Next;

                /* move inner list node into position of outer node */
                list.Prev = node.Prev;
                list.Next = node.Next;
                list.Parent = node.Parent;
                FixNodeLinks(list);

                /* get rid of outer ul and its li */
                child.Content = null;
                node.Content = null;
                node.Next = null;

                /*
                If prev node was a list the chances are this node
                should be appended to that list. Word has no way of
                recognizing nested lists and just uses indents
                */

                if (list.Prev != null)
                {
                    node = list;
                    list = node.Prev;

                    if (list.Tag == _tt.TagUl || list.Tag == _tt.TagOl)
                    {
                        list.Next = node.Next;

                        if (list.Next != null)
                        {
                            list.Next.Prev = list;
                        }

                        child = list.Last; /* <li> */

                        node.Parent = child;
                        node.Next = null;
                        node.Prev = child.Last;
                        FixNodeLinks(node);
                    }
                }

                CleanNode(lexer, node);
                return true;
            }

            return false;
        }
Beispiel #15
0
        private bool NiceBody(Lexer lexer, Node doc)
        {
            Node body = doc.FindBody(lexer.Options.TagTable);

            if (body != null)
            {
                if (body.GetAttrByName("background") != null || body.GetAttrByName("bgcolor") != null ||
                    body.GetAttrByName("text") != null || body.GetAttrByName("link") != null ||
                    body.GetAttrByName("vlink") != null || body.GetAttrByName("alink") != null)
                {
                    lexer.BadLayout |= Report.USING_BODY;
                    return false;
                }
            }

            return true;
        }
Beispiel #16
0
        /* the only child of table cell or an inline element such as em */
        private bool InlineStyle(Lexer lexer, Node node)
        {
            if (node.Tag != _tt.TagFont && (node.Tag.Model & (ContentModel.INLINE | ContentModel.ROW)) != 0)
            {
                Node child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                if (child.Tag == _tt.TagB && lexer.Options.LogicalEmphasis)
                {
                    MergeStyles(node, child);
                    AddStyleProperty(node, "font-weight: bold");
                    StripOnlyChild(node);
                    return true;
                }

                if (child.Tag == _tt.TagI && lexer.Options.LogicalEmphasis)
                {
                    MergeStyles(node, child);
                    AddStyleProperty(node, "font-style: italic");
                    StripOnlyChild(node);
                    return true;
                }

                if (child.Tag == _tt.TagFont)
                {
                    MergeStyles(node, child);
                    AddFontStyles(node, child.Attributes);
                    StripOnlyChild(node);
                    return true;
                }
            }

            return false;
        }
Beispiel #17
0
        /*
        This is a major clean up to strip out all the extra stuff you get
        when you save as web page from Word 2000. It doesn't yet know what
        to do with VML tags, but these will appear as errors unless you
        declare them as new tags, such as o:p which needs to be declared
        as inline.
        */
        public virtual void CleanWord2000(Lexer lexer, Node node)
        {
            /* used to a list from a sequence of bulletted p's */
            Node list = null;

            while (node != null)
            {
                /* discard Word's style verbiage */
                if (node.Tag == _tt.TagStyle || node.Tag == _tt.TagMeta || node.Type == Node.COMMENT_TAG)
                {
                    node = Node.DiscardElement(node);
                    continue;
                }

                /* strip out all span tags Word scatters so liberally! */
                if (node.Tag == _tt.TagSpan)
                {
                    node = StripSpan(lexer, node);
                    continue;
                }

                /* get rid of Word's xmlns attributes */
                if (node.Tag == _tt.TagHtml)
                {
                    /* check that it's a Word 2000 document */
                    if (node.GetAttrByName("xmlns:o") == null)
                    {
                        return;
                    }
                }

                if (node.Tag == _tt.TagLink)
                {
                    AttVal attr = node.GetAttrByName("rel");

                    if (attr != null && attr.Val != null && attr.Val.Equals("File-List"))
                    {
                        node = Node.DiscardElement(node);
                        continue;
                    }
                }

                /* discard empty paragraphs */
                if (node.Content == null && node.Tag == _tt.TagP)
                {
                    node = Node.DiscardElement(node);
                    continue;
                }

                if (node.Tag == _tt.TagP)
                {
                    AttVal attr = node.GetAttrByName("class");

                    /* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */
                    if (attr != null && attr.Val != null && attr.Val.Equals("MsoListBullet"))
                    {
                        Node.CoerceNode(lexer, node, _tt.TagLi);

                        if (list == null || list.Tag != _tt.TagUl)
                        {
                            list = lexer.InferredTag("ul");
                            Node.InsertNodeBeforeElement(node, list);
                        }

                        PurgeAttributes(node);

                        if (node.Content != null)
                        {
                            CleanWord2000(lexer, node.Content);
                        }

                        /* remove node and append to contents of list */
                        Node.RemoveNode(node);
                        Node.InsertNodeAtEnd(list, node);
                        node = list.Next;
                    }
                    else if (attr != null && attr.Val != null && attr.Val.Equals("Code"))
                    {
                        /* map sequence of <p class="Code"> to <pre>...</pre> */
                        Node br = lexer.NewLineNode();
                        NormalizeSpaces(node);

                        if (list == null || list.Tag != _tt.TagPre)
                        {
                            list = lexer.InferredTag("pre");
                            Node.InsertNodeBeforeElement(node, list);
                        }

                        /* remove node and append to contents of list */
                        Node.RemoveNode(node);
                        Node.InsertNodeAtEnd(list, node);
                        StripSpan(lexer, node);
                        Node.InsertNodeAtEnd(list, br);
                        node = list.Next;
                    }
                    else
                    {
                        list = null;
                    }
                }
                else
                {
                    list = null;
                }

                /* strip out style and class attributes */
                if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                {
                    PurgeAttributes(node);
                }

                if (node.Content != null)
                {
                    CleanWord2000(lexer, node.Content);
                }

                node = node.Next;
            }
        }
Beispiel #18
0
        /*
        Replace font elements by span elements, deleting
        the font element's attributes and replacing them
        by a single style attribute.
        */
        private bool Font2Span(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagFont)
            {
                if (lexer.Options.DropFontTags)
                {
                    DiscardContainer(node, pnode);
                    return false;
                }

                /* if FONT is only child of parent element then leave alone */
                if (node.Parent.Content == node && node.Next == null)
                {
                    return false;
                }

                AddFontStyles(node, node.Attributes);

                /* extract style attribute and free the rest */
                AttVal av = node.Attributes;
                AttVal style = null;

                while (av != null)
                {
                    AttVal next = av.Next;

                    if (av.Attribute.Equals("style"))
                    {
                        av.Next = null;
                        style = av;
                    }

                    av = next;
                }

                node.Attributes = style;

                node.Tag = _tt.TagSpan;
                node.Element = "span";

                return true;
            }

            return false;
        }
Beispiel #19
0
        /* ignore unknown attributes for proprietary elements */
        public virtual Attribute CheckAttribute(Lexer lexer, Node node)
        {
            TagCollection tt = lexer.Options.TagTable;

            if (Asp == null && Php == null)
            {
                CheckUniqueAttribute(lexer, node);
            }

            Attribute attribute = Dict;
            if (attribute != null)
            {
                /* title is vers 2.0 for A and LINK otherwise vers 4.0 */
                if (attribute == AttributeTable.AttrTitle && (node.Tag == tt.TagA || node.Tag == tt.TagLink))
                {
                    lexer.Versions &= HtmlVersion.All;
                }
                else if ((attribute.Versions & HtmlVersion.Xml) != 0)
                {
                    if (!(lexer.Options.XmlTags || lexer.Options.XmlOut))
                    {
                        Report.AttrError(lexer, node, Attribute, Report.XML_ATTRIBUTE_VALUE);
                    }
                }
                else
                {
                    lexer.Versions &= attribute.Versions;
                }

                if (attribute.AttrCheck != null)
                {
                    attribute.AttrCheck.Check(lexer, node, this);
                }
            }
            else if (!lexer.Options.XmlTags && node.Tag != null && _asp == null &&
                     !(node.Tag != null && ((node.Tag.Versions & HtmlVersion.Proprietary) != HtmlVersion.Unknown)))
            {
                Report.AttrError(lexer, node, Attribute, Report.UNKNOWN_ATTRIBUTE);
            }

            return attribute;
        }
Beispiel #20
0
        public virtual void DropSections(Lexer lexer, Node node)
        {
            while (node != null)
            {
                if (node.Type == Node.SECTION_TAG)
                {
                    /* prune up to matching endif */
                    if ((Lexer.GetString(node.Textarray, node.Start, 2)).Equals("if"))
                    {
                        node = PruneSection(lexer, node);
                        continue;
                    }

                    /* discard others as well */
                    node = Node.DiscardElement(node);
                    continue;
                }

                if (node.Content != null)
                {
                    DropSections(lexer, node.Content);
                }

                node = node.Next;
            }
        }
Beispiel #21
0
        /*
        the same attribute name can't be used
        more than once in each element
        */
        public virtual void CheckUniqueAttribute(Lexer lexer, Node node)
        {
            AttVal attr;
            int count = 0;

            for (attr = Next; attr != null; attr = attr.Next)
            {
                if (Attribute != null && attr.Attribute != null && attr.Asp == null && attr.Php == null &&
                    String.CompareOrdinal(Attribute, attr.Attribute) == 0)
                {
                    ++count;
                }
            }

            if (count > 0)
            {
                Report.AttrError(lexer, node, Attribute, Report.REPEATED_ATTRIBUTE);
            }
        }
Beispiel #22
0
        /* node is <![if ...]> prune up to <![endif]> */
        public virtual Node PruneSection(Lexer lexer, Node node)
        {
            for (;;)
            {
                /* discard node and returns next */
                node = Node.DiscardElement(node);

                if (node == null)
                    return null;

                if (node.Type == Node.SECTION_TAG)
                {
                    if ((Lexer.GetString(node.Textarray, node.Start, 2)).Equals("if"))
                    {
                        node = PruneSection(lexer, node);
                        continue;
                    }

                    if ((Lexer.GetString(node.Textarray, node.Start, 5)).Equals("endif"))
                    {
                        node = Node.DiscardElement(node);
                        break;
                    }
                }
            }

            return node;
        }
Beispiel #23
0
        /*
        Add meta element for page transition effect, this works on IE but not NS
        */
        public virtual void AddTransitionEffect(Lexer lexer, Node root, short effect, double duration)
        {
            Node head = root.FindHead(lexer.Options.TagTable);
            string transition;

            if (0 <= effect && effect <= 23)
            {
                transition = "revealTrans(Duration=" + (duration).ToString() + ",Transition=" + effect + ")";
            }
            else
            {
                transition = "blendTrans(Duration=" + (duration).ToString() + ")";
            }

            if (head != null)
            {
                Node meta = lexer.InferredTag("meta");
                meta.AddAttribute("http-equiv", "Page-Enter");
                meta.AddAttribute("content", transition);
                Node.InsertNodeAtStart(head, meta);
            }
        }
Beispiel #24
0
        private void DefineStyleRules(Lexer lexer, Node node)
        {
            if (node.Content != null)
            {
                Node child;
                for (child = node.Content; child != null; child = child.Next)
                {
                    DefineStyleRules(lexer, child);
                }
            }

            Style2Rule(lexer, node);
        }
Beispiel #25
0
        /*
        Called from printTree to print the content of a slide from
        the node slidecontent. On return slidecontent points to the
        node starting the next slide or null. The variables slide
        and count are used to customise the navigation bar.
        */
        public virtual void PrintSlide(Out fout, int mode, int indent, Lexer lexer)
        {
            TagCollection tt = _options.TagTable;

            /* insert div for onclick handler */
            string s = "<div onclick=\"document.location='slide" + (_slide < _count ? _slide + 1 : 1).ToString() +
                       ".html'\">";
            PrintString(s);
            CondFlushLine(fout, indent);

            /* first print the h2 element and navbar */
            if (_slidecontent.Tag == tt.TagH2)
            {
                PrintNavBar(fout, indent);

                /* now print an hr after h2 */

                AddC('<', _linelen++);

                AddC(Lexer.FoldCase('h', _options.UpperCaseTags, _options.XmlTags), _linelen++);
                AddC(Lexer.FoldCase('r', _options.UpperCaseTags, _options.XmlTags), _linelen++);

                if (_options.XmlOut)
                {
                    PrintString(" />");
                }
                else
                {
                    AddC('>', _linelen++);
                }

                if (_options.IndentContent)
                {
                    CondFlushLine(fout, indent);
                }

                /* PrintVertSpacer(fout, indent); */

                /*CondFlushLine(fout, indent); */

                /* print the h2 element */
                PrintTree(fout, mode, (_options.IndentContent ? indent + _options.Spaces : indent), lexer, _slidecontent);

                _slidecontent = _slidecontent.Next;
            }

            /* now continue until we reach the next h2 */

            Node last = null;
            Node content = _slidecontent;

            for (; content != null; content = content.Next)
            {
                if (content.Tag == tt.TagH2)
                {
                    break;
                }

                /* kludge for naked text before block level tag */
                if (last != null && !_options.IndentContent && last.Type == Node.TEXT_NODE && content.Tag != null &&
                    (content.Tag.Model & ContentModel.BLOCK) != 0)
                {
                    FlushLine(fout, indent);
                    FlushLine(fout, indent);
                }

                PrintTree(fout, mode, (_options.IndentContent ? indent + _options.Spaces : indent), lexer, content);

                last = content;
            }

            _slidecontent = content;

            /* now print epilog */

            CondFlushLine(fout, indent);

            PrintString("<br clear=\"all\">");
            CondFlushLine(fout, indent);

            AddC('<', _linelen++);

            AddC(Lexer.FoldCase('h', _options.UpperCaseTags, _options.XmlTags), _linelen++);
            AddC(Lexer.FoldCase('r', _options.UpperCaseTags, _options.XmlTags), _linelen++);

            if (_options.XmlOut)
            {
                PrintString(" />");
            }
            else
            {
                AddC('>', _linelen++);
            }

            if (_options.IndentContent)
            {
                CondFlushLine(fout, indent);
            }

            PrintNavBar(fout, indent);

            /* end tag for div */
            PrintString("</div>");
            CondFlushLine(fout, indent);
        }
Beispiel #26
0
 private void AddColorRule(Lexer lexer, string selector, string color)
 {
     if (color != null)
     {
         lexer.AddStringLiteral(selector);
         lexer.AddStringLiteral(" { color: ");
         lexer.AddStringLiteral(color);
         lexer.AddStringLiteral(" }\n");
     }
 }
Beispiel #27
0
        public virtual void PrintXmlTree(Out fout, int mode, int indent, Lexer lexer, Node node)
        {
            TagCollection tt = _options.TagTable;

            if (node == null)
            {
                return;
            }

            if (node.Type == Node.TEXT_NODE)
            {
                PrintText(fout, mode, indent, node.Textarray, node.Start, node.End);
            }
            else if (node.Type == Node.COMMENT_TAG)
            {
                CondFlushLine(fout, indent);
                PrintComment(fout, 0, node);
                CondFlushLine(fout, 0);
            }
            else if (node.Type == Node.ROOT_NODE)
            {
                Node content;

                for (content = node.Content; content != null; content = content.Next)
                {
                    PrintXmlTree(fout, mode, indent, lexer, content);
                }
            }
            else if (node.Type == Node.DOC_TYPE_TAG)
            {
                PrintDocType(fout, indent, node);
            }
            else if (node.Type == Node.PROC_INS_TAG)
            {
                PrintPi(fout, indent, node);
            }
            else if (node.Type == Node.SECTION_TAG)
            {
                PrintSection(fout, indent, node);
            }
            else if (node.Type == Node.ASP_TAG)
            {
                PrintAsp(fout, indent, node);
            }
            else if (node.Type == Node.JSTE_TAG)
            {
                PrintJste(fout, indent, node);
            }
            else if (node.Type == Node.PHP_TAG)
            {
                PrintPhp(fout, indent, node);
            }
            else if ((node.Tag.Model & ContentModel.EMPTY) != 0 || node.Type == Node.START_END_TAG)
            {
                CondFlushLine(fout, indent);
                PrintTag(lexer, fout, mode, indent, node);
                FlushLine(fout, indent);

                if (node.Next != null)
                {
                    FlushLine(fout, indent);
                }
            }
            else
            {
                /* some kind of container element */
                Node content;
                bool mixed = false;
                int cindent;

                for (content = node.Content; content != null; content = content.Next)
                {
                    if (content.Type == Node.TEXT_NODE)
                    {
                        mixed = true;
                        break;
                    }
                }

                CondFlushLine(fout, indent);

                if (ParserImpl.XmlPreserveWhiteSpace(node, tt))
                {
                    indent = 0;
                    cindent = 0;
                    mixed = false;
                }
                else if (mixed)
                {
                    cindent = indent;
                }
                else
                {
                    cindent = indent + _options.Spaces;
                }

                PrintTag(lexer, fout, mode, indent, node);

                if (!mixed)
                {
                    FlushLine(fout, indent);
                }

                for (content = node.Content; content != null; content = content.Next)
                {
                    PrintXmlTree(fout, mode, cindent, lexer, content);
                }

                if (!mixed)
                {
                    CondFlushLine(fout, cindent);
                }
                PrintEndTag(node);
                CondFlushLine(fout, indent);

                if (node.Next != null)
                {
                    FlushLine(fout, indent);
                }
            }
        }
Beispiel #28
0
        /*
        Symptom: <center>
        Action: replace <center> by <div style="text-align: center">
        */
        private bool Center2Div(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagCenter)
            {
                if (lexer.Options.DropFontTags)
                {
                    if (node.Content != null)
                    {
                        Node last = node.Last;
                        Node parent = node.Parent;

                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");

                        if (last.Next != null)
                        {
                            last.Next.Prev = node;
                        }

                        node.Next = last.Next;
                        last.Next = node;
                        node.Prev = last;

                        if (parent.Last == last)
                        {
                            parent.Last = node;
                        }

                        node.Parent = parent;
                    }
                    else
                    {
                        Node prev = node.Prev;
                        Node next = node.Next;
                        Node parent = node.Parent;
                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");
                        node.Next = next;
                        node.Prev = prev;
                        node.Parent = parent;

                        if (next != null)
                        {
                            next.Prev = node;
                        }
                        else
                        {
                            parent.Last = node;
                        }

                        if (prev != null)
                        {
                            prev.Next = node;
                        }
                        else
                        {
                            parent.Content = node;
                        }
                    }

                    return true;
                }
                node.Tag = _tt.TagDiv;
                node.Element = "div";
                AddStyleProperty(node, "text-align: center");
                return true;
            }

            return false;
        }
Beispiel #29
0
        /* Word2000 uses span excessively, so we strip span out */
        public virtual Node StripSpan(Lexer lexer, Node span)
        {
            Node node;
            Node prev = null;

            /*
            deal with span elements that have content
            by splicing the content in place of the span
            after having processed it
            */

            CleanWord2000(lexer, span.Content);
            Node content = span.Content;

            if (span.Prev != null)
            {
                prev = span.Prev;
            }
            else if (content != null)
            {
                node = content;
                content = content.Next;
                Node.RemoveNode(node);
                Node.InsertNodeBeforeElement(span, node);
                prev = node;
            }

            while (content != null)
            {
                node = content;
                content = content.Next;
                Node.RemoveNode(node);
                Node.InsertNodeAfterElement(prev, node);
                prev = node;
            }

            if (span.Next == null)
            {
                span.Parent.Last = prev;
            }

            node = span.Next;
            span.Content = null;
            Node.DiscardElement(span);
            return node;
        }
Beispiel #30
0
        public virtual void CleanTree(Lexer lexer, Node doc)
        {
            doc = CreateStyleProperties(lexer, doc);

            if (!lexer.Options.MakeClean)
            {
                DefineStyleRules(lexer, doc);
                CreateStyleElement(lexer, doc);
            }
        }