Lexer for html parser (c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.cs for the copyright notice. Derived from HTML Tidy Release 4 Aug 2000
Given a file stream fp it returns a sequence of tokens. GetToken(fp) gets the next token UngetToken(fp) provides one level undo The tags include an attribute list: - linked list of attribute/value nodes - each node has 2 null-terminated strings. - entities are replaced in attribute values white space is compacted if not in preformatted mode If not in preformatted mode then leading white space is discarded and subsequent white space sequences compacted to single space chars. If XmlTags is no then Tag names are folded to upper case and attribute names to lower case. Not yet done: - Doctype subset and marked sections
コード例 #1
0
        public virtual void Check(Lexer lexer, Node node)
        {
            node.CheckUniqueAttributes(lexer);

            AttVal lang = node.GetAttrByName("language");
            AttVal type = node.GetAttrByName("type");
            if (type == null)
            {
                Report.AttrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);

                /* check for javascript */
                if (lang != null)
                {
                    string str = lang.Val;
                    if (str.Length > 10)
                    {
                        str = str.Substring(0, 10);
                    }

                    if ((String.CompareOrdinal(str, "javascript") == 0) || (String.CompareOrdinal(str, "jscript") == 0))
                    {
                        node.AddAttribute("type", "text/javascript");
                    }
                }
                else
                {
                    node.AddAttribute("type", "text/javascript");
                }
            }
        }
コード例 #2
0
        public virtual void Check(Lexer lexer, Node node, AttVal attval)
        {
            string val = attval.Val;

            if (val == null)
            {
                Report.AttrError(lexer, node, attval.Attribute, Report.MISSING_ATTR_VALUE);
            }
            else if (String.CompareOrdinal(val, "top") == 0 || String.CompareOrdinal(val, "middle") == 0 ||
                     String.CompareOrdinal(val, "bottom") == 0 || String.CompareOrdinal(val, "baseline") == 0)
            {
                /* all is fine */
            }
            else if (String.CompareOrdinal(val, "left") == 0 || String.CompareOrdinal(val, "right") == 0)
            {
                if (!(node.Tag != null && ((node.Tag.Model & ContentModel.IMG) != 0)))
                {
                    Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE);
                }
            }
            else if (String.CompareOrdinal(val, "texttop") == 0 || String.CompareOrdinal(val, "absmiddle") == 0 ||
                     String.CompareOrdinal(val, "absbottom") == 0 || String.CompareOrdinal(val, "textbottom") == 0)
            {
                lexer.Versions &= HtmlVersion.Proprietary;
                Report.AttrError(lexer, node, val, Report.PROPRIETARY_ATTR_VALUE);
            }
            else
            {
                Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE);
            }
        }
コード例 #3
0
        public virtual void Check(Lexer lexer, Node node)
        {
            AttVal attval;
            string val = null;

            node.CheckUniqueAttributes(lexer);

            for (attval = node.Attributes; attval != null; attval = attval.Next)
            {
                if (String.CompareOrdinal(attval.Attribute, "align") == 0)
                {
                    val = attval.Val;
                    break;
                }
            }

            if (val != null)
            {
                if (String.CompareOrdinal(val, "left") == 0 || String.CompareOrdinal(val, "right") == 0)
                {
                    lexer.Versions &= HtmlVersion.Html40Loose | HtmlVersion.Frames;
                }
                else if (String.CompareOrdinal(val, "top") == 0 || String.CompareOrdinal(val, "bottom") == 0)
                {
                    lexer.Versions &= HtmlVersion.From32;
                }
                else
                {
                    Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE);
                }
            }
        }
コード例 #4
0
ファイル: TidyMessage.cs プロジェクト: r1pper/TidyNetPortable
 internal TidyMessage(Lexer lexer, string message, MessageLevel level)
 {
     _filename = String.Empty;
     _line = lexer.Lines;
     _column = lexer.Columns;
     _message = message;
     _level = level;
 }
コード例 #5
0
        public virtual void Check(Lexer lexer, Node node, AttVal attval)
        {
            /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
            if (node.Tag != null && ((node.Tag.Model & ContentModel.IMG) != 0))
            {
                AttrCheckImpl.CheckValign.Check(lexer, node, attval);
                return;
            }

            string val = attval.Val;

            if (val == null)
            {
                Report.AttrError(lexer, node, attval.Attribute, Report.MISSING_ATTR_VALUE);
            }
            else if (
                !(String.CompareOrdinal(val, "left") == 0 || String.CompareOrdinal(val, "center") == 0 ||
                  String.CompareOrdinal(val, "right") == 0 || String.CompareOrdinal(val, "justify") == 0))
            {
                Report.AttrError(lexer, node, attval.Val, Report.BAD_ATTRIBUTE_VALUE);
            }
        }
コード例 #6
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /*
        move presentation attribs from body to style element

        background="foo" ->  body { background-image: url(foo) }
        bgcolor="foo"    ->  body { background-color: foo }
        text="foo"       ->  body { color: foo }
        link="foo"       ->  :link { color: foo }
        vlink="foo"      ->  :visited { color: foo }
        alink="foo"      ->  :active { color: foo }
        */
        private void CleanBodyAttrs(Lexer lexer, Node body)
        {
            string bgurl = null;
            string bgcolor = null;
            string color = null;

            AttVal attr = body.GetAttrByName("background");

            if (attr != null)
            {
                bgurl = attr.Val;
                attr.Val = null;
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("bgcolor");

            if (attr != null)
            {
                bgcolor = attr.Val;
                attr.Val = null;
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("text");

            if (attr != null)
            {
                color = attr.Val;
                attr.Val = null;
                body.RemoveAttribute(attr);
            }

            if (bgurl != null || bgcolor != null || color != null)
            {
                lexer.AddStringLiteral(" body {\n");

                if (bgurl != null)
                {
                    lexer.AddStringLiteral("  background-image: url(");
                    lexer.AddStringLiteral(bgurl);
                    lexer.AddStringLiteral(");\n");
                }

                if (bgcolor != null)
                {
                    lexer.AddStringLiteral("  background-color: ");
                    lexer.AddStringLiteral(bgcolor);
                    lexer.AddStringLiteral(";\n");
                }

                if (color != null)
                {
                    lexer.AddStringLiteral("  color: ");
                    lexer.AddStringLiteral(color);
                    lexer.AddStringLiteral(";\n");
                }

                lexer.AddStringLiteral(" }\n");
            }

            attr = body.GetAttrByName("link");

            if (attr != null)
            {
                AddColorRule(lexer, " :link", attr.Val);
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("vlink");

            if (attr != null)
            {
                AddColorRule(lexer, " :visited", attr.Val);
                body.RemoveAttribute(attr);
            }

            attr = body.GetAttrByName("alink");

            if (attr != null)
            {
                AddColorRule(lexer, " :active", attr.Val);
                body.RemoveAttribute(attr);
            }
        }
コード例 #7
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /*
        Applies all matching rules to a node.
        */
        private Node CleanNode(Lexer lexer, Node node)
        {
            Node next;
            var o = new MutableObject();

            for (next = node; node.IsElement; node = next)
            {
                o.Object = next;

                bool b = Dir2Div(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = NestedList(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Center2Div(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = MergeDivs(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = BlockStyle(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = InlineStyle(lexer, node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Font2Span(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                break;
            }

            return next;
        }
コード例 #8
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        private Node CreateStyleProperties(Lexer lexer, Node node)
        {
            if (node.Content != null)
            {
                Node child;
                for (child = node.Content; child != null; child = child.Next)
                {
                    child = CreateStyleProperties(lexer, child);
                }
            }

            return CleanNode(lexer, node);
        }
コード例 #9
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        private string FindStyle(Lexer lexer, string tag, string properties)
        {
            Style style;

            for (style = lexer.Styles; style != null; style = style.Next)
            {
                if (style.Tag.Equals(tag) && style.Properties.Equals(properties))
                {
                    return style.TagClass;
                }
            }

            style = new Style(tag, GenSymClass(), properties, lexer.Styles);
            lexer.Styles = style;
            return style.TagClass;
        }
コード例 #10
0
ファイル: PPrint.cs プロジェクト: r1pper/TidyNetPortable
        public virtual void CreateSlides(Lexer lexer, Node root)
        {
            Out output = new OutImpl();

            Node body = root.FindBody(lexer.Options.TagTable);
            _count = CountSlides(body);
            _slidecontent = body.Content;
            AddTransitionEffect(lexer, root, EFFECT_BLEND, 3.0);

            for (_slide = 1; _slide <= _count; ++_slide)
            {
                string buf = "slide" + _slide + ".html";
                output.State = StreamIn.FSM_ASCII;
                output.Encoding = _options.CharEncoding;

                try
                {
                    output.Output = new MemoryStream();
                    PrintTree(output, 0, 0, lexer, root);
                    FlushLine(output, 0);
                }
                catch (IOException e)
                {
                    Debug.WriteLine(buf + e);
                }
            }
        }
コード例 #11
0
ファイル: PPrint.cs プロジェクト: r1pper/TidyNetPortable
        public virtual void PrintTree(Out fout, int mode, int indent, Lexer lexer, Node node)
        {
            Node content;
            TagCollection tt = _options.TagTable;

            if (node == null)
                return;

            if (node.Type == Node.TEXT_NODE)
            {
                PrintText(fout, mode, indent, node.Textarray, node.Start, node.End);
            }
            else if (node.Type == Node.COMMENT_TAG)
            {
                PrintComment(fout, indent, node);
            }
            else if (node.Type == Node.ROOT_NODE)
            {
                for (content = node.Content; content != null; content = content.Next)
                {
                    PrintTree(fout, mode, indent, lexer, content);
                }
            }
            else if (node.Type == Node.DOC_TYPE_TAG)
            {
                PrintDocType(fout, indent, node);
            }
            else if (node.Type == Node.PROC_INS_TAG)
            {
                PrintPi(fout, indent, node);
            }
            else if (node.Type == Node.CDATA_TAG)
            {
                PrintCdata(fout, indent, node);
            }
            else if (node.Type == Node.SECTION_TAG)
            {
                PrintSection(fout, indent, node);
            }
            else if (node.Type == Node.ASP_TAG)
            {
                PrintAsp(fout, indent, node);
            }
            else if (node.Type == Node.JSTE_TAG)
            {
                PrintJste(fout, indent, node);
            }
            else if (node.Type == Node.PHP_TAG)
            {
                PrintPhp(fout, indent, node);
            }
            else if ((node.Tag.Model & ContentModel.EMPTY) != 0 || node.Type == Node.START_END_TAG)
            {
                if ((node.Tag.Model & ContentModel.INLINE) == 0)
                {
                    CondFlushLine(fout, indent);
                }

                if (node.Tag == tt.TagBr && node.Prev != null && node.Prev.Tag != tt.TagBr && _options.BreakBeforeBr)
                {
                    FlushLine(fout, indent);
                }

                if (_options.MakeClean && node.Tag == tt.TagWbr)
                {
                    PrintString(" ");
                }
                else
                {
                    PrintTag(lexer, fout, mode, indent, node);
                }

                if (node.Tag == tt.TagParam || node.Tag == tt.TagArea)
                {
                    CondFlushLine(fout, indent);
                }
                else if (node.Tag == tt.TagBr || node.Tag == tt.TagHr)
                {
                    FlushLine(fout, indent);
                }
            }
            else
            {
                /* some kind of container element */
                if (node.Tag != null && node.Tag.Parser == ParserImpl.ParsePre)
                {
                    CondFlushLine(fout, indent);

                    indent = 0;
                    CondFlushLine(fout, indent);
                    PrintTag(lexer, fout, mode, indent, node);
                    FlushLine(fout, indent);

                    for (content = node.Content; content != null; content = content.Next)
                    {
                        PrintTree(fout, (mode | PREFORMATTED | NOWRAP), indent, lexer, content);
                    }

                    CondFlushLine(fout, indent);
                    PrintEndTag(node);
                    FlushLine(fout, indent);

                    if (_options.IndentContent == false && node.Next != null)
                    {
                        FlushLine(fout, indent);
                    }
                }
                else if (node.Tag == tt.TagStyle || node.Tag == tt.TagScript)
                {
                    CondFlushLine(fout, indent);

                    indent = 0;
                    CondFlushLine(fout, indent);
                    PrintTag(lexer, fout, mode, indent, node);
                    FlushLine(fout, indent);

                    for (content = node.Content; content != null; content = content.Next)
                    {
                        PrintTree(fout, (mode | PREFORMATTED | NOWRAP | CDATA), indent, lexer, content);
                    }

                    CondFlushLine(fout, indent);
                    PrintEndTag(node);
                    FlushLine(fout, indent);

                    if (_options.IndentContent == false && node.Next != null)
                    {
                        FlushLine(fout, indent);
                    }
                }
                else if ((node.Tag.Model & ContentModel.INLINE) != 0)
                {
                    if (_options.MakeClean)
                    {
                        /* discards <font> and </font> tags */
                        if (node.Tag == tt.TagFont)
                        {
                            for (content = node.Content; content != null; content = content.Next)
                            {
                                PrintTree(fout, mode, indent, lexer, content);
                            }
                            return;
                        }

                        /* replace <nobr>...</nobr> by &nbsp; or &#160; etc. */
                        if (node.Tag == tt.TagNobr)
                        {
                            for (content = node.Content; content != null; content = content.Next)
                            {
                                PrintTree(fout, (mode | NOWRAP), indent, lexer, content);
                            }
                            return;
                        }
                    }

                    /* otherwise a normal inline element */

                    PrintTag(lexer, fout, mode, indent, node);

                    /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */

                    if (ShouldIndent(node))
                    {
                        CondFlushLine(fout, indent);
                        indent += _options.Spaces;

                        for (content = node.Content; content != null; content = content.Next)
                        {
                            PrintTree(fout, mode, indent, lexer, content);
                        }

                        CondFlushLine(fout, indent);
                        indent -= _options.Spaces;
                        CondFlushLine(fout, indent);
                    }
                    else
                    {
                        for (content = node.Content; content != null; content = content.Next)
                        {
                            PrintTree(fout, mode, indent, lexer, content);
                        }
                    }

                    PrintEndTag(node);
                }
                else
                {
                    /* other tags */
                    CondFlushLine(fout, indent);

                    if (_options.SmartIndent && node.Prev != null)
                    {
                        FlushLine(fout, indent);
                    }

                    if (_options.HideEndTags == false ||
                        !(node.Tag != null && ((node.Tag.Model & ContentModel.OMIT_ST) != 0)))
                    {
                        PrintTag(lexer, fout, mode, indent, node);

                        if (ShouldIndent(node))
                        {
                            CondFlushLine(fout, indent);
                        }
                        else if ((node.Tag.Model & ContentModel.HTML) != 0 || node.Tag == tt.TagNoframes ||
                                 ((node.Tag.Model & ContentModel.HEAD) != 0 && node.Tag != tt.TagTitle))
                        {
                            FlushLine(fout, indent);
                        }
                    }

                    if (node.Tag == tt.TagBody && _options.BurstSlides)
                    {
                        PrintSlide(fout, mode, (_options.IndentContent ? indent + _options.Spaces : indent), lexer);
                    }
                    else
                    {
                        Node last = null;

                        for (content = node.Content; content != null; content = content.Next)
                        {
                            /* kludge for naked text before block level tag */
                            if (last != null && !_options.IndentContent && last.Type == Node.TEXT_NODE &&
                                content.Tag != null && (content.Tag.Model & ContentModel.BLOCK) != 0)
                            {
                                FlushLine(fout, indent);
                                FlushLine(fout, indent);
                            }

                            PrintTree(fout, mode, (ShouldIndent(node) ? indent + _options.Spaces : indent), lexer,
                                      content);

                            last = content;
                        }
                    }

                    /* don't flush line for td and th */
                    if (ShouldIndent(node) ||
                        (((node.Tag.Model & ContentModel.HTML) != 0 || node.Tag == tt.TagNoframes ||
                          ((node.Tag.Model & ContentModel.HEAD) != 0 && node.Tag != tt.TagTitle)) &&
                         _options.HideEndTags == false))
                    {
                        CondFlushLine(fout, (_options.IndentContent ? indent + _options.Spaces : indent));

                        if (_options.HideEndTags == false || (node.Tag.Model & ContentModel.OPT) == 0)
                        {
                            PrintEndTag(node);
                            FlushLine(fout, indent);
                        }
                    }
                    else
                    {
                        if (_options.HideEndTags == false || (node.Tag.Model & ContentModel.OPT) == 0)
                        {
                            PrintEndTag(node);
                        }

                        FlushLine(fout, indent);
                    }

                    if (_options.IndentContent == false && node.Next != null && _options.HideEndTags == false &&
                        (node.Tag.Model &
                         (ContentModel.BLOCK | ContentModel.LIST | ContentModel.DEFLIST | ContentModel.TABLE)) != 0)
                    {
                        FlushLine(fout, indent);
                    }
                }
            }
        }
コード例 #12
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /*
        Find style attribute in node, and replace it
        by corresponding class attribute. Search for
        class in style dictionary otherwise gensym
        new class and add to dictionary.

        Assumes that node doesn't have a class attribute
        */
        private void Style2Rule(Lexer lexer, Node node)
        {
            AttVal styleattr = node.GetAttrByName("style");

            if (styleattr == null) return;
            string classname = FindStyle(lexer, node.Element, styleattr.Val);
            AttVal classattr = node.GetAttrByName("class");

            /*
                if there already is a class attribute
                then append class name after a space
                */
            if (classattr != null)
            {
                classattr.Val = classattr.Val + " " + classname;
                node.RemoveAttribute(styleattr);
            }
            else
            {
                /* reuse style attribute for class attribute */
                styleattr.Attribute = "class";
                styleattr.Val = classname;
            }
        }
コード例 #13
0
ファイル: PPrint.cs プロジェクト: r1pper/TidyNetPortable
        private void PrintTag(Lexer lexer, Out fout, int mode, int indent, Node node)
        {
            TagCollection tt = _options.TagTable;

            AddC('<', _linelen++);

            if (node.Type == Node.END_TAG)
            {
                AddC('/', _linelen++);
            }

            string p = node.Element;
            for (int i = 0; i < p.Length; i++)
            {
                AddC(Lexer.FoldCase(p[i], _options.UpperCaseTags, _options.XmlTags), _linelen++);
            }

            PrintAttrs(fout, indent, node, node.Attributes);

            if ((_options.XmlOut || lexer != null && lexer.Isvoyager) &&
                (node.Type == Node.START_END_TAG || (node.Tag.Model & ContentModel.EMPTY) != 0))
            {
                AddC(' ', _linelen++); /* compatibility hack */
                AddC('/', _linelen++);
            }

            AddC('>', _linelen++);

            if (node.Type == Node.START_END_TAG || (mode & PREFORMATTED) != 0) return;
            if (indent + _linelen >= _options.WrapLen)
            {
                WrapLine(fout, indent);
            }

            if (indent + _linelen < _options.WrapLen)
            {
                /*
                    wrap after start tag if is <br/> or if it's not
                    inline or it is an empty tag followed by </a>
                    */
                if (AfterSpace(node))
                {
                    if ((mode & NOWRAP) == 0 &&
                        ((node.Tag.Model & ContentModel.INLINE) == 0 || (node.Tag == tt.TagBr) ||
                         (((node.Tag.Model & ContentModel.EMPTY) != 0) && node.Next == null &&
                          node.Parent.Tag == tt.TagA)))
                    {
                        _wraphere = _linelen;
                    }
                }
            }
            else
            {
                CondFlushLine(fout, indent);
            }
        }
コード例 #14
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /*
        Symptom: <ul><li><ul>...</ul></li></ul>
        Action: discard outer list
        */
        private bool NestedList(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagUl || node.Tag == _tt.TagOl)
            {
                Node child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                Node list = child.Content;

                if (list == null)
                {
                    return false;
                }

                if (list.Tag != node.Tag)
                {
                    return false;
                }

                pnode.Object = node.Next;

                /* move inner list node into position of outer node */
                list.Prev = node.Prev;
                list.Next = node.Next;
                list.Parent = node.Parent;
                FixNodeLinks(list);

                /* get rid of outer ul and its li */
                child.Content = null;
                node.Content = null;
                node.Next = null;

                /*
                If prev node was a list the chances are this node
                should be appended to that list. Word has no way of
                recognizing nested lists and just uses indents
                */

                if (list.Prev != null)
                {
                    node = list;
                    list = node.Prev;

                    if (list.Tag == _tt.TagUl || list.Tag == _tt.TagOl)
                    {
                        list.Next = node.Next;

                        if (list.Next != null)
                        {
                            list.Next.Prev = list;
                        }

                        child = list.Last; /* <li> */

                        node.Parent = child;
                        node.Next = null;
                        node.Prev = child.Last;
                        FixNodeLinks(node);
                    }
                }

                CleanNode(lexer, node);
                return true;
            }

            return false;
        }
コード例 #15
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        private bool NiceBody(Lexer lexer, Node doc)
        {
            Node body = doc.FindBody(lexer.Options.TagTable);

            if (body != null)
            {
                if (body.GetAttrByName("background") != null || body.GetAttrByName("bgcolor") != null ||
                    body.GetAttrByName("text") != null || body.GetAttrByName("link") != null ||
                    body.GetAttrByName("vlink") != null || body.GetAttrByName("alink") != null)
                {
                    lexer.BadLayout |= Report.USING_BODY;
                    return false;
                }
            }

            return true;
        }
コード例 #16
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /* the only child of table cell or an inline element such as em */
        private bool InlineStyle(Lexer lexer, Node node)
        {
            if (node.Tag != _tt.TagFont && (node.Tag.Model & (ContentModel.INLINE | ContentModel.ROW)) != 0)
            {
                Node child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                if (child.Tag == _tt.TagB && lexer.Options.LogicalEmphasis)
                {
                    MergeStyles(node, child);
                    AddStyleProperty(node, "font-weight: bold");
                    StripOnlyChild(node);
                    return true;
                }

                if (child.Tag == _tt.TagI && lexer.Options.LogicalEmphasis)
                {
                    MergeStyles(node, child);
                    AddStyleProperty(node, "font-style: italic");
                    StripOnlyChild(node);
                    return true;
                }

                if (child.Tag == _tt.TagFont)
                {
                    MergeStyles(node, child);
                    AddFontStyles(node, child.Attributes);
                    StripOnlyChild(node);
                    return true;
                }
            }

            return false;
        }
コード例 #17
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /*
        This is a major clean up to strip out all the extra stuff you get
        when you save as web page from Word 2000. It doesn't yet know what
        to do with VML tags, but these will appear as errors unless you
        declare them as new tags, such as o:p which needs to be declared
        as inline.
        */
        public virtual void CleanWord2000(Lexer lexer, Node node)
        {
            /* used to a list from a sequence of bulletted p's */
            Node list = null;

            while (node != null)
            {
                /* discard Word's style verbiage */
                if (node.Tag == _tt.TagStyle || node.Tag == _tt.TagMeta || node.Type == Node.COMMENT_TAG)
                {
                    node = Node.DiscardElement(node);
                    continue;
                }

                /* strip out all span tags Word scatters so liberally! */
                if (node.Tag == _tt.TagSpan)
                {
                    node = StripSpan(lexer, node);
                    continue;
                }

                /* get rid of Word's xmlns attributes */
                if (node.Tag == _tt.TagHtml)
                {
                    /* check that it's a Word 2000 document */
                    if (node.GetAttrByName("xmlns:o") == null)
                    {
                        return;
                    }
                }

                if (node.Tag == _tt.TagLink)
                {
                    AttVal attr = node.GetAttrByName("rel");

                    if (attr != null && attr.Val != null && attr.Val.Equals("File-List"))
                    {
                        node = Node.DiscardElement(node);
                        continue;
                    }
                }

                /* discard empty paragraphs */
                if (node.Content == null && node.Tag == _tt.TagP)
                {
                    node = Node.DiscardElement(node);
                    continue;
                }

                if (node.Tag == _tt.TagP)
                {
                    AttVal attr = node.GetAttrByName("class");

                    /* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */
                    if (attr != null && attr.Val != null && attr.Val.Equals("MsoListBullet"))
                    {
                        Node.CoerceNode(lexer, node, _tt.TagLi);

                        if (list == null || list.Tag != _tt.TagUl)
                        {
                            list = lexer.InferredTag("ul");
                            Node.InsertNodeBeforeElement(node, list);
                        }

                        PurgeAttributes(node);

                        if (node.Content != null)
                        {
                            CleanWord2000(lexer, node.Content);
                        }

                        /* remove node and append to contents of list */
                        Node.RemoveNode(node);
                        Node.InsertNodeAtEnd(list, node);
                        node = list.Next;
                    }
                    else if (attr != null && attr.Val != null && attr.Val.Equals("Code"))
                    {
                        /* map sequence of <p class="Code"> to <pre>...</pre> */
                        Node br = lexer.NewLineNode();
                        NormalizeSpaces(node);

                        if (list == null || list.Tag != _tt.TagPre)
                        {
                            list = lexer.InferredTag("pre");
                            Node.InsertNodeBeforeElement(node, list);
                        }

                        /* remove node and append to contents of list */
                        Node.RemoveNode(node);
                        Node.InsertNodeAtEnd(list, node);
                        StripSpan(lexer, node);
                        Node.InsertNodeAtEnd(list, br);
                        node = list.Next;
                    }
                    else
                    {
                        list = null;
                    }
                }
                else
                {
                    list = null;
                }

                /* strip out style and class attributes */
                if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                {
                    PurgeAttributes(node);
                }

                if (node.Content != null)
                {
                    CleanWord2000(lexer, node.Content);
                }

                node = node.Next;
            }
        }
コード例 #18
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /*
        Replace font elements by span elements, deleting
        the font element's attributes and replacing them
        by a single style attribute.
        */
        private bool Font2Span(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagFont)
            {
                if (lexer.Options.DropFontTags)
                {
                    DiscardContainer(node, pnode);
                    return false;
                }

                /* if FONT is only child of parent element then leave alone */
                if (node.Parent.Content == node && node.Next == null)
                {
                    return false;
                }

                AddFontStyles(node, node.Attributes);

                /* extract style attribute and free the rest */
                AttVal av = node.Attributes;
                AttVal style = null;

                while (av != null)
                {
                    AttVal next = av.Next;

                    if (av.Attribute.Equals("style"))
                    {
                        av.Next = null;
                        style = av;
                    }

                    av = next;
                }

                node.Attributes = style;

                node.Tag = _tt.TagSpan;
                node.Element = "span";

                return true;
            }

            return false;
        }
コード例 #19
0
ファイル: AttVal.cs プロジェクト: r1pper/TidyNetPortable
        /* ignore unknown attributes for proprietary elements */
        public virtual Attribute CheckAttribute(Lexer lexer, Node node)
        {
            TagCollection tt = lexer.Options.TagTable;

            if (Asp == null && Php == null)
            {
                CheckUniqueAttribute(lexer, node);
            }

            Attribute attribute = Dict;
            if (attribute != null)
            {
                /* title is vers 2.0 for A and LINK otherwise vers 4.0 */
                if (attribute == AttributeTable.AttrTitle && (node.Tag == tt.TagA || node.Tag == tt.TagLink))
                {
                    lexer.Versions &= HtmlVersion.All;
                }
                else if ((attribute.Versions & HtmlVersion.Xml) != 0)
                {
                    if (!(lexer.Options.XmlTags || lexer.Options.XmlOut))
                    {
                        Report.AttrError(lexer, node, Attribute, Report.XML_ATTRIBUTE_VALUE);
                    }
                }
                else
                {
                    lexer.Versions &= attribute.Versions;
                }

                if (attribute.AttrCheck != null)
                {
                    attribute.AttrCheck.Check(lexer, node, this);
                }
            }
            else if (!lexer.Options.XmlTags && node.Tag != null && _asp == null &&
                     !(node.Tag != null && ((node.Tag.Versions & HtmlVersion.Proprietary) != HtmlVersion.Unknown)))
            {
                Report.AttrError(lexer, node, Attribute, Report.UNKNOWN_ATTRIBUTE);
            }

            return attribute;
        }
コード例 #20
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        public virtual void DropSections(Lexer lexer, Node node)
        {
            while (node != null)
            {
                if (node.Type == Node.SECTION_TAG)
                {
                    /* prune up to matching endif */
                    if ((Lexer.GetString(node.Textarray, node.Start, 2)).Equals("if"))
                    {
                        node = PruneSection(lexer, node);
                        continue;
                    }

                    /* discard others as well */
                    node = Node.DiscardElement(node);
                    continue;
                }

                if (node.Content != null)
                {
                    DropSections(lexer, node.Content);
                }

                node = node.Next;
            }
        }
コード例 #21
0
ファイル: AttVal.cs プロジェクト: r1pper/TidyNetPortable
        /*
        the same attribute name can't be used
        more than once in each element
        */
        public virtual void CheckUniqueAttribute(Lexer lexer, Node node)
        {
            AttVal attr;
            int count = 0;

            for (attr = Next; attr != null; attr = attr.Next)
            {
                if (Attribute != null && attr.Attribute != null && attr.Asp == null && attr.Php == null &&
                    String.CompareOrdinal(Attribute, attr.Attribute) == 0)
                {
                    ++count;
                }
            }

            if (count > 0)
            {
                Report.AttrError(lexer, node, Attribute, Report.REPEATED_ATTRIBUTE);
            }
        }
コード例 #22
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /* node is <![if ...]> prune up to <![endif]> */
        public virtual Node PruneSection(Lexer lexer, Node node)
        {
            for (;;)
            {
                /* discard node and returns next */
                node = Node.DiscardElement(node);

                if (node == null)
                    return null;

                if (node.Type == Node.SECTION_TAG)
                {
                    if ((Lexer.GetString(node.Textarray, node.Start, 2)).Equals("if"))
                    {
                        node = PruneSection(lexer, node);
                        continue;
                    }

                    if ((Lexer.GetString(node.Textarray, node.Start, 5)).Equals("endif"))
                    {
                        node = Node.DiscardElement(node);
                        break;
                    }
                }
            }

            return node;
        }
コード例 #23
0
ファイル: PPrint.cs プロジェクト: r1pper/TidyNetPortable
        /*
        Add meta element for page transition effect, this works on IE but not NS
        */
        public virtual void AddTransitionEffect(Lexer lexer, Node root, short effect, double duration)
        {
            Node head = root.FindHead(lexer.Options.TagTable);
            string transition;

            if (0 <= effect && effect <= 23)
            {
                transition = "revealTrans(Duration=" + (duration).ToString() + ",Transition=" + effect + ")";
            }
            else
            {
                transition = "blendTrans(Duration=" + (duration).ToString() + ")";
            }

            if (head != null)
            {
                Node meta = lexer.InferredTag("meta");
                meta.AddAttribute("http-equiv", "Page-Enter");
                meta.AddAttribute("content", transition);
                Node.InsertNodeAtStart(head, meta);
            }
        }
コード例 #24
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        private void DefineStyleRules(Lexer lexer, Node node)
        {
            if (node.Content != null)
            {
                Node child;
                for (child = node.Content; child != null; child = child.Next)
                {
                    DefineStyleRules(lexer, child);
                }
            }

            Style2Rule(lexer, node);
        }
コード例 #25
0
ファイル: PPrint.cs プロジェクト: r1pper/TidyNetPortable
        /*
        Called from printTree to print the content of a slide from
        the node slidecontent. On return slidecontent points to the
        node starting the next slide or null. The variables slide
        and count are used to customise the navigation bar.
        */
        public virtual void PrintSlide(Out fout, int mode, int indent, Lexer lexer)
        {
            TagCollection tt = _options.TagTable;

            /* insert div for onclick handler */
            string s = "<div onclick=\"document.location='slide" + (_slide < _count ? _slide + 1 : 1).ToString() +
                       ".html'\">";
            PrintString(s);
            CondFlushLine(fout, indent);

            /* first print the h2 element and navbar */
            if (_slidecontent.Tag == tt.TagH2)
            {
                PrintNavBar(fout, indent);

                /* now print an hr after h2 */

                AddC('<', _linelen++);

                AddC(Lexer.FoldCase('h', _options.UpperCaseTags, _options.XmlTags), _linelen++);
                AddC(Lexer.FoldCase('r', _options.UpperCaseTags, _options.XmlTags), _linelen++);

                if (_options.XmlOut)
                {
                    PrintString(" />");
                }
                else
                {
                    AddC('>', _linelen++);
                }

                if (_options.IndentContent)
                {
                    CondFlushLine(fout, indent);
                }

                /* PrintVertSpacer(fout, indent); */

                /*CondFlushLine(fout, indent); */

                /* print the h2 element */
                PrintTree(fout, mode, (_options.IndentContent ? indent + _options.Spaces : indent), lexer, _slidecontent);

                _slidecontent = _slidecontent.Next;
            }

            /* now continue until we reach the next h2 */

            Node last = null;
            Node content = _slidecontent;

            for (; content != null; content = content.Next)
            {
                if (content.Tag == tt.TagH2)
                {
                    break;
                }

                /* kludge for naked text before block level tag */
                if (last != null && !_options.IndentContent && last.Type == Node.TEXT_NODE && content.Tag != null &&
                    (content.Tag.Model & ContentModel.BLOCK) != 0)
                {
                    FlushLine(fout, indent);
                    FlushLine(fout, indent);
                }

                PrintTree(fout, mode, (_options.IndentContent ? indent + _options.Spaces : indent), lexer, content);

                last = content;
            }

            _slidecontent = content;

            /* now print epilog */

            CondFlushLine(fout, indent);

            PrintString("<br clear=\"all\">");
            CondFlushLine(fout, indent);

            AddC('<', _linelen++);

            AddC(Lexer.FoldCase('h', _options.UpperCaseTags, _options.XmlTags), _linelen++);
            AddC(Lexer.FoldCase('r', _options.UpperCaseTags, _options.XmlTags), _linelen++);

            if (_options.XmlOut)
            {
                PrintString(" />");
            }
            else
            {
                AddC('>', _linelen++);
            }

            if (_options.IndentContent)
            {
                CondFlushLine(fout, indent);
            }

            PrintNavBar(fout, indent);

            /* end tag for div */
            PrintString("</div>");
            CondFlushLine(fout, indent);
        }
コード例 #26
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
 private void AddColorRule(Lexer lexer, string selector, string color)
 {
     if (color != null)
     {
         lexer.AddStringLiteral(selector);
         lexer.AddStringLiteral(" { color: ");
         lexer.AddStringLiteral(color);
         lexer.AddStringLiteral(" }\n");
     }
 }
コード例 #27
0
ファイル: PPrint.cs プロジェクト: r1pper/TidyNetPortable
        public virtual void PrintXmlTree(Out fout, int mode, int indent, Lexer lexer, Node node)
        {
            TagCollection tt = _options.TagTable;

            if (node == null)
            {
                return;
            }

            if (node.Type == Node.TEXT_NODE)
            {
                PrintText(fout, mode, indent, node.Textarray, node.Start, node.End);
            }
            else if (node.Type == Node.COMMENT_TAG)
            {
                CondFlushLine(fout, indent);
                PrintComment(fout, 0, node);
                CondFlushLine(fout, 0);
            }
            else if (node.Type == Node.ROOT_NODE)
            {
                Node content;

                for (content = node.Content; content != null; content = content.Next)
                {
                    PrintXmlTree(fout, mode, indent, lexer, content);
                }
            }
            else if (node.Type == Node.DOC_TYPE_TAG)
            {
                PrintDocType(fout, indent, node);
            }
            else if (node.Type == Node.PROC_INS_TAG)
            {
                PrintPi(fout, indent, node);
            }
            else if (node.Type == Node.SECTION_TAG)
            {
                PrintSection(fout, indent, node);
            }
            else if (node.Type == Node.ASP_TAG)
            {
                PrintAsp(fout, indent, node);
            }
            else if (node.Type == Node.JSTE_TAG)
            {
                PrintJste(fout, indent, node);
            }
            else if (node.Type == Node.PHP_TAG)
            {
                PrintPhp(fout, indent, node);
            }
            else if ((node.Tag.Model & ContentModel.EMPTY) != 0 || node.Type == Node.START_END_TAG)
            {
                CondFlushLine(fout, indent);
                PrintTag(lexer, fout, mode, indent, node);
                FlushLine(fout, indent);

                if (node.Next != null)
                {
                    FlushLine(fout, indent);
                }
            }
            else
            {
                /* some kind of container element */
                Node content;
                bool mixed = false;
                int cindent;

                for (content = node.Content; content != null; content = content.Next)
                {
                    if (content.Type == Node.TEXT_NODE)
                    {
                        mixed = true;
                        break;
                    }
                }

                CondFlushLine(fout, indent);

                if (ParserImpl.XmlPreserveWhiteSpace(node, tt))
                {
                    indent = 0;
                    cindent = 0;
                    mixed = false;
                }
                else if (mixed)
                {
                    cindent = indent;
                }
                else
                {
                    cindent = indent + _options.Spaces;
                }

                PrintTag(lexer, fout, mode, indent, node);

                if (!mixed)
                {
                    FlushLine(fout, indent);
                }

                for (content = node.Content; content != null; content = content.Next)
                {
                    PrintXmlTree(fout, mode, cindent, lexer, content);
                }

                if (!mixed)
                {
                    CondFlushLine(fout, cindent);
                }
                PrintEndTag(node);
                CondFlushLine(fout, indent);

                if (node.Next != null)
                {
                    FlushLine(fout, indent);
                }
            }
        }
コード例 #28
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /*
        Symptom: <center>
        Action: replace <center> by <div style="text-align: center">
        */
        private bool Center2Div(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagCenter)
            {
                if (lexer.Options.DropFontTags)
                {
                    if (node.Content != null)
                    {
                        Node last = node.Last;
                        Node parent = node.Parent;

                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");

                        if (last.Next != null)
                        {
                            last.Next.Prev = node;
                        }

                        node.Next = last.Next;
                        last.Next = node;
                        node.Prev = last;

                        if (parent.Last == last)
                        {
                            parent.Last = node;
                        }

                        node.Parent = parent;
                    }
                    else
                    {
                        Node prev = node.Prev;
                        Node next = node.Next;
                        Node parent = node.Parent;
                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");
                        node.Next = next;
                        node.Prev = prev;
                        node.Parent = parent;

                        if (next != null)
                        {
                            next.Prev = node;
                        }
                        else
                        {
                            parent.Last = node;
                        }

                        if (prev != null)
                        {
                            prev.Next = node;
                        }
                        else
                        {
                            parent.Content = node;
                        }
                    }

                    return true;
                }
                node.Tag = _tt.TagDiv;
                node.Element = "div";
                AddStyleProperty(node, "text-align: center");
                return true;
            }

            return false;
        }
コード例 #29
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        /* Word2000 uses span excessively, so we strip span out */
        public virtual Node StripSpan(Lexer lexer, Node span)
        {
            Node node;
            Node prev = null;

            /*
            deal with span elements that have content
            by splicing the content in place of the span
            after having processed it
            */

            CleanWord2000(lexer, span.Content);
            Node content = span.Content;

            if (span.Prev != null)
            {
                prev = span.Prev;
            }
            else if (content != null)
            {
                node = content;
                content = content.Next;
                Node.RemoveNode(node);
                Node.InsertNodeBeforeElement(span, node);
                prev = node;
            }

            while (content != null)
            {
                node = content;
                content = content.Next;
                Node.RemoveNode(node);
                Node.InsertNodeAfterElement(prev, node);
                prev = node;
            }

            if (span.Next == null)
            {
                span.Parent.Last = prev;
            }

            node = span.Next;
            span.Content = null;
            Node.DiscardElement(span);
            return node;
        }
コード例 #30
0
ファイル: Clean.cs プロジェクト: r1pper/TidyNetPortable
        public virtual void CleanTree(Lexer lexer, Node doc)
        {
            doc = CreateStyleProperties(lexer, doc);

            if (!lexer.Options.MakeClean)
            {
                DefineStyleRules(lexer, doc);
                CreateStyleElement(lexer, doc);
            }
        }