Exemplo n.º 1
0
        public void AppendSelfClosingTagSuffix_VaryingHtmlVersion_AppendsCorrectOutput(HtmlVersion htmlVersion, string expectedOutput)
        {
            var template = new DummyTemplate();
            string result = template.AppendSelfClosingTagSuffix(htmlVersion);

            Assert.That(result, Is.EqualTo(expectedOutput));
        }
Exemplo n.º 2
0
 public Attribute(string name, HtmlVersion versions, IAttrCheck attrCheck)
 {
     _name      = name;
     _nowrap    = false;
     _literal   = false;
     _versions  = versions;
     _attrCheck = attrCheck;
 }
Exemplo n.º 3
0
 public Dict(string name, HtmlVersion versions, int model, IParser parser, ICheckAttribs checkAttribs)
 {
     Name         = name;
     Versions     = versions;
     Model        = model;
     Parser       = parser;
     CheckAttribs = checkAttribs;
 }
Exemplo n.º 4
0
 public Attribute(string name, HtmlVersion versions, IAttrCheck attrCheck)
 {
     Name      = name;
     Nowrap    = false;
     Literal   = false;
     Versions  = versions;
     AttrCheck = attrCheck;
 }
Exemplo n.º 5
0
 public void Render(TextWriter writer, HtmlVersion htmlVersion, IDictionary <string, object> viewData)
 {
     Invariant.ArgumentNotNull(writer, "textWriter");
     _htmlVersion         = htmlVersion;
     ViewData             = viewData;
     HasCodeBlockRepeated = false;
     CoreRender(writer);
 }
Exemplo n.º 6
0
        public void RenderAttributeNameValuePair_BooleanAttribute_WritesCorrectAttributes(string name, string value, HtmlVersion htmlVersion, string expectedOutput)
        {
            var template = new DummyTemplate();
            template.SetHtmlVersion(htmlVersion);
            string result = template.RenderAttributeNameValuePair(name, value, '\"');

            Assert.That(result, Is.EqualTo(expectedOutput));
        }
Exemplo n.º 7
0
 public Dict(string name, HtmlVersion versions, int model, IParser parser, ICheckAttribs checkAttribs)
 {
     Name = name;
     Versions = versions;
     Model = model;
     Parser = parser;
     CheckAttribs = checkAttribs;
 }
Exemplo n.º 8
0
 public Dict(string name, HtmlVersion versions, int model, IParser parser, ICheckAttribs checkAttribs)
 {
     _name         = name;
     _versions     = versions;
     _model        = model;
     _parser       = parser;
     _checkAttribs = checkAttribs;
 }
Exemplo n.º 9
0
        public void Html5Support(HtmlVersion htmlVersion)
        {
            Document doc = new Document(MyDir + "Document.doc");

            HtmlSaveOptions saveOptions = new HtmlSaveOptions();

            saveOptions.HtmlVersion = htmlVersion;
        }
Exemplo n.º 10
0
 public Attribute(string name, HtmlVersion versions, IAttrCheck attrCheck)
 {
     Name = name;
     Nowrap = false;
     Literal = false;
     Versions = versions;
     AttrCheck = attrCheck;
 }
Exemplo n.º 11
0
 public Attribute(string name, HtmlVersion versions, IAttrCheck attrCheck)
 {
     _name = name;
     _nowrap = false;
     _literal = false;
     _versions = versions;
     _attrCheck = attrCheck;
 }
Exemplo n.º 12
0
 public Dict(string name, HtmlVersion versions, int model, IParser parser, ICheckAttribs checkAttribs)
 {
     _name = name;
     _versions = versions;
     _model = model;
     _parser = parser;
     _checkAttribs = checkAttribs;
 }
Exemplo n.º 13
0
 public void Render(TextWriter writer, HtmlVersion htmlVersion, IDictionary<string, object> viewData)
 {
     Invariant.ArgumentNotNull(writer, "textWriter");
     _htmlVersion = htmlVersion;
     ViewData = viewData;
     HasCodeBlockRepeated = false;
     CoreRender(writer);
 }
Exemplo n.º 14
0
 private static string GetStrictDocType(HtmlVersion htmlVersion)
 {
     switch (htmlVersion)
     {
         case HtmlVersion.Html4:
             return @"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.01//EN"" ""http://www.w3.org/TR/html4/strict.dtd"">";
         default:
             return @"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Strict//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"">";
     }
 }
Exemplo n.º 15
0
 private static string GetTransitionalDocType(HtmlVersion htmlVersion)
 {
     switch (htmlVersion)
     {
         case HtmlVersion.Html4:
             return @"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.01 Transitional//EN"" ""http://www.w3.org/TR/html4/loose.dtd"">";
         default:
             return @"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Transitional//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"">";
     }
 }
Exemplo n.º 16
0
        private static string GetStrictDocType(HtmlVersion htmlVersion)
        {
            switch (htmlVersion)
            {
            case HtmlVersion.Html4:
                return(@"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.01//EN"" ""http://www.w3.org/TR/html4/strict.dtd"">");

            default:
                return(@"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Strict//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"">");
            }
        }
Exemplo n.º 17
0
        private static string GetTransitionalDocType(HtmlVersion htmlVersion)
        {
            switch (htmlVersion)
            {
            case HtmlVersion.Html4:
                return(@"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.01 Transitional//EN"" ""http://www.w3.org/TR/html4/loose.dtd"">");

            default:
                return(@"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Transitional//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"">");
            }
        }
Exemplo n.º 18
0
        public void Html5Support(HtmlVersion htmlVersion)
        {
            Document doc = new Document(MyDir + "Document.doc");

            HtmlSaveOptions saveOptions = new HtmlSaveOptions
            {
                HtmlVersion = htmlVersion
            };

            doc.Save(ArtifactsDir + "HtmlSaveOptions.Html5Support.html", saveOptions);
        }
Exemplo n.º 19
0
        public void Walk_AutoSelfClosingTag_AppendsCorrectTag(HtmlVersion htmlVersion, string expectedFormat)
        {
            // Arrange
            var tagNode = new HamlNodeTag(new HamlLine("br"));

            // Act
            _hamlOptions.HtmlVersion = htmlVersion;
            _tagWalker.Walk(tagNode);

            // Assert
            Assert.That(_classBuilderMock.Build(""), Is.StringContaining(expectedFormat));
        }
Exemplo n.º 20
0
        private static string GetXmlDocType(IList <string> docTypeParts, HtmlVersion htmlVersion)
        {
            if (htmlVersion != HtmlVersion.XHtml)
            {
                return("");
            }

            string encoding = docTypeParts.Count < 2
                ? "utf-8"
                : docTypeParts[1];

            return("<?xml version='1.0' encoding='" + encoding + "' ?>");
        }
Exemplo n.º 21
0
 public HamlOptions()
 {
     AutoClosingTags = new List<string> {
                             "area",
                             "base",
                             "br",
                             "col",
                             "hr",
                             "img",
                             "input",
                             "link",
                             "meta",
                             "param"
                         };
     HtmlVersion = CodeDom.HtmlVersion.XHtml;
 }
Exemplo n.º 22
0
        public static string GetDocType(string docType, HtmlVersion htmlVersion)
        {
            var docTypeParts = docType.Split(' ');

            if (docTypeParts.Length == 0)
            {
                return(GetTransitionalDocType(htmlVersion));
            }

            if (htmlVersion == HtmlVersion.Html5 &&
                docTypeParts[0].ToUpper() != "XML")
            {
                return(GetHtml5DocType());
            }

            switch (docTypeParts[0].ToUpper())
            {
            case "STRICT": return(GetStrictDocType(htmlVersion));

            case "FRAMESET": return(GetFramesetDocType(htmlVersion));

            case "5": return(GetHtml5DocType());

            case "1.1": return(GetXHtmlDocType11());

            case "BASIC": return(GetBasicDocType());

            case "MOBILE": return(GetMobileDocType());

            case "RDFA": return(GetRdfaDocType());

            case "XML": return(GetXmlDocType(docTypeParts, htmlVersion));

            default: return(GetTransitionalDocType(htmlVersion));
            }
        }
Exemplo n.º 23
0
        public static string GetDocType(string docType, HtmlVersion htmlVersion)
        {
            var docTypeParts = docType.Split(' ');

            if (docTypeParts.Length == 0)
                return GetTransitionalDocType(htmlVersion);

            if (htmlVersion == HtmlVersion.Html5
                && docTypeParts[0].ToUpper() != "XML")
                return GetHtml5DocType();

            switch (docTypeParts[0].ToUpper())
            {
                case "STRICT": return GetStrictDocType(htmlVersion);
                case "FRAMESET": return GetFramesetDocType(htmlVersion);
                case "5": return GetHtml5DocType();
                case "1.1": return GetXHtmlDocType11();
                case "BASIC": return GetBasicDocType();
                case "MOBILE": return GetMobileDocType();
                case "RDFA": return GetRdfaDocType();
                case "XML": return GetXmlDocType(docTypeParts, htmlVersion);
                default: return GetTransitionalDocType(htmlVersion);
            }
        }
Exemplo n.º 24
0
 public Lexer(StreamIn input, TidyOptions options)
 {
     Input = input;
     Lines = 1;
     Columns = 1;
     State = LEX_CONTENT;
     BadAccess = 0;
     BadLayout = 0;
     BadChars = 0;
     BadForm = 0;
     Waswhite = false;
     Pushed = false;
     Insertspace = false;
     Exiled = false;
     Isvoyager = false;
     Versions = HtmlVersion.Everything;
     Doctype = HtmlVersion.Unknown;
     BadDoctype = false;
     Txtstart = 0;
     Txtend = 0;
     Token = null;
     Lexbuf = null;
     Lexlength = 0;
     Lexsize = 0;
     Inode = null;
     Insert = - 1;
     Istack = new Stack<InlineStack>();
     Istackbase = 0;
     Styles = null;
     Options = options;
     SeenBodyEndTag = 0;
     _nodeList = new List<Node>();
 }
Exemplo n.º 25
0
        public void AppendSelfClosingTagSuffix_VaryingHtmlVersion_AppendsCorrectOutput(HtmlVersion htmlVersion, string expectedOutput)
        {
            var    template = new DummyTemplate();
            string result   = template.AppendSelfClosingTagSuffix(htmlVersion);

            Assert.That(result, Is.EqualTo(expectedOutput));
        }
Exemplo n.º 26
0
 public string AppendSelfClosingTagSuffix(HtmlVersion htmlVersion)
 {
     base.SetHtmlVersion(htmlVersion);
     return(base.AppendSelfClosingTagSuffix());
 }
Exemplo n.º 27
0
        public void RenderAttributeNameValuePair_BooleanAttribute_WritesCorrectAttributes(string name, string value, HtmlVersion htmlVersion, string expectedOutput)
        {
            var template = new DummyTemplate();

            template.SetHtmlVersion(htmlVersion);
            string result = template.RenderAttributeNameValuePair(name, value, '\"');

            Assert.That(result, Is.EqualTo(expectedOutput));
        }
Exemplo n.º 28
0
 public string AppendSelfClosingTagSuffix(HtmlVersion htmlVersion)
 {
     base.SetHtmlVersion(htmlVersion);
     return base.AppendSelfClosingTagSuffix();
 }
Exemplo n.º 29
0
        /*
        modes for GetToken()

        MixedContent   -- for elements which don't accept PCDATA
        Preformatted       -- white space preserved as is
        IgnoreMarkup       -- for CDATA elements such as script, style
        */
        public virtual Node GetToken(short mode)
        {
            int c;
            int badcomment = 0;
            var isempty = new MutableBoolean();

            if (Pushed)
            {
                /* duplicate inlines in preference to pushed text nodes when appropriate */
                if (Token.Type != Node.TEXT_NODE || (Insert == - 1 && Inode == null))
                {
                    Pushed = false;
                    return Token;
                }
            }

            /* at start of block elements, unclosed inline
            elements are inserted into the token stream */

            if (Insert != - 1 || Inode != null)
            {
                return InsertedToken();
            }

            Lines = Input.CursorLine;
            Columns = Input.CursorColumn;
            Waswhite = false;

            Txtstart = Lexsize;
            Txtend = Lexsize;

            while (true)
            {
                c = Input.ReadChar();
                if (c == StreamIn.END_OF_STREAM)
                {
                    break;
                }

                if (Insertspace && mode != IGNORE_WHITESPACE)
                {
                    AddCharToLexer(' ');
                    Waswhite = true;
                    Insertspace = false;
                }

                /* treat \r\n as \n and \r as \n */

                if (c == '\r')
                {
                    c = Input.ReadChar();

                    if (c != '\n')
                    {
                        Input.UngetChar(c);
                    }

                    c = '\n';
                }

                AddCharToLexer(c);

                short map;
                switch (State)
                {
                    case LEX_CONTENT:
                        map = Map((char) c);

                        /*
                        Discard white space if appropriate. Its cheaper
                        to do this here rather than in parser methods
                        for elements that don't have mixed content.
                        */
                        if (((map & WHITE) != 0) && (mode == IGNORE_WHITESPACE) && Lexsize == Txtstart + 1)
                        {
                            --Lexsize;
                            Waswhite = false;
                            Lines = Input.CursorLine;
                            Columns = Input.CursorColumn;
                            continue;
                        }

                        if (c == '<')
                        {
                            State = LEX_GT;
                            continue;
                        }

                        if ((map & WHITE) != 0)
                        {
                            /* was previous char white? */
                            if (Waswhite)
                            {
                                if (mode != PREFORMATTED && mode != IGNORE_MARKUP)
                                {
                                    --Lexsize;
                                    Lines = Input.CursorLine;
                                    Columns = Input.CursorColumn;
                                }
                            }
                                /* prev char wasn't white */
                            else
                            {
                                Waswhite = true;

                                if (mode != PREFORMATTED && mode != IGNORE_MARKUP && c != ' ')
                                {
                                    ChangeChar((byte) ' ');
                                }
                            }

                            continue;
                        }
                        if (c == '&' && mode != IGNORE_MARKUP)
                        {
                            ParseEntity(mode);
                        }

                        /* this is needed to avoid trimming trailing whitespace */
                        if (mode == IGNORE_WHITESPACE)
                            mode = MIXED_CONTENT;

                        Waswhite = false;
                        continue;

                    case LEX_GT:
                        if (c == '/')
                        {
                            c = Input.ReadChar();
                            if (c == StreamIn.END_OF_STREAM)
                            {
                                Input.UngetChar(c);
                                continue;
                            }

                            AddCharToLexer(c);
                            map = Map((char) c);

                            if ((map & LETTER) != 0)
                            {
                                Lexsize -= 3;
                                Txtend = Lexsize;
                                Input.UngetChar(c);
                                State = LEX_ENDTAG;
                                Lexbuf[Lexsize] = (byte) '\x0000'; /* debug */
                                Input.CursorColumn -= 2;

                                /* if some text before the </ return it now */
                                if (Txtend > Txtstart)
                                {
                                    /* trim space char before end tag */
                                    if (mode == IGNORE_WHITESPACE && Lexbuf[Lexsize - 1] == (byte) ' ')
                                    {
                                        Lexsize -= 1;
                                        Txtend = Lexsize;
                                    }

                                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                    return Token;
                                }

                                continue; /* no text so keep going */
                            }

                            /* otherwise treat as CDATA */
                            Waswhite = false;
                            State = LEX_CONTENT;
                            continue;
                        }

                        if (mode == IGNORE_MARKUP)
                        {
                            /* otherwise treat as CDATA */
                            Waswhite = false;
                            State = LEX_CONTENT;
                            continue;
                        }

                        /*
                        look out for comments, doctype or marked sections
                        this isn't quite right, but its getting there ...
                        */
                        if (c == '!')
                        {
                            c = Input.ReadChar();
                            if (c == '-')
                            {
                                c = Input.ReadChar();
                                if (c == '-')
                                {
                                    State = LEX_COMMENT; /* comment */
                                    Lexsize -= 2;
                                    Txtend = Lexsize;

                                    /* if some text before < return it now */
                                    if (Txtend > Txtstart)
                                    {
                                        Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                        return Token;
                                    }

                                    Txtstart = Lexsize;
                                    continue;
                                }

                                Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
                            }
                            else if (c == 'd' || c == 'D')
                            {
                                State = LEX_DOCTYPE; /* doctype */
                                Lexsize -= 2;
                                Txtend = Lexsize;
                                mode = IGNORE_WHITESPACE;

                                /* skip until white space or '>' */

                                for (;;)
                                {
                                    c = Input.ReadChar();

                                    if (c == StreamIn.END_OF_STREAM || c == '>')
                                    {
                                        Input.UngetChar(c);
                                        break;
                                    }

                                    map = Map((char) c);
                                    if ((map & WHITE) == 0)
                                    {
                                        continue;
                                    }

                                    /* and skip to end of whitespace */

                                    for (;;)
                                    {
                                        c = Input.ReadChar();

                                        if (c == StreamIn.END_OF_STREAM || c == '>')
                                        {
                                            Input.UngetChar(c);
                                            break;
                                        }

                                        map = Map((char) c);

                                        if ((map & WHITE) != 0)
                                        {
                                            continue;
                                        }

                                        Input.UngetChar(c);
                                        break;
                                    }

                                    break;
                                }

                                /* if some text before < return it now */
                                if (Txtend > Txtstart)
                                {
                                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                    return Token;
                                }

                                Txtstart = Lexsize;
                                continue;
                            }
                            else if (c == '[')
                            {
                                /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
                                Lexsize -= 2;
                                State = LEX_SECTION;
                                Txtend = Lexsize;

                                /* if some text before < return it now */
                                if (Txtend > Txtstart)
                                {
                                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                    return Token;
                                }

                                Txtstart = Lexsize;
                                continue;
                            }

                            /* otherwise swallow chars up to and including next '>' */
                            while (true)
                            {
                                c = Input.ReadChar();
                                if (c == '>')
                                {
                                    break;
                                }
                                if (c == - 1)
                                {
                                    Input.UngetChar(c);
                                    break;
                                }
                            }

                            Lexsize -= 2;
                            Lexbuf[Lexsize] = (byte) '\x0000';
                            State = LEX_CONTENT;
                            continue;
                        }

                        /*
                        processing instructions
                        */

                        if (c == '?')
                        {
                            Lexsize -= 2;
                            State = LEX_PROCINSTR;
                            Txtend = Lexsize;

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            Txtstart = Lexsize;
                            continue;
                        }

                        /* Microsoft ASP's e.g. <% ... server-code ... %> */
                        if (c == '%')
                        {
                            Lexsize -= 2;
                            State = LEX_ASP;
                            Txtend = Lexsize;

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            Txtstart = Lexsize;
                            continue;
                        }

                        /* Netscapes JSTE e.g. <# ... server-code ... #> */
                        if (c == '#')
                        {
                            Lexsize -= 2;
                            State = LEX_JSTE;
                            Txtend = Lexsize;

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            Txtstart = Lexsize;
                            continue;
                        }

                        map = Map((char) c);

                        /* check for start tag */
                        if ((map & LETTER) != 0)
                        {
                            Input.UngetChar(c); /* push back letter */
                            Lexsize -= 2; /* discard "<" + letter */
                            Txtend = Lexsize;
                            State = LEX_STARTTAG; /* ready to read tag name */

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            continue; /* no text so keep going */
                        }

                        /* otherwise treat as CDATA */
                        State = LEX_CONTENT;
                        Waswhite = false;
                        continue;

                    case LEX_ENDTAG:
                        Txtstart = Lexsize - 1;
                        Input.CursorColumn += 2;
                        c = ParseTagName();
                        Token = NewNode(Node.END_TAG, Lexbuf, Txtstart, Txtend,
                                        GetString(Lexbuf, Txtstart, Txtend - Txtstart));
                        Lexsize = Txtstart;
                        Txtend = Txtstart;

                        /* skip to '>' */
                        while (c != '>')
                        {
                            c = Input.ReadChar();
                            if (c == StreamIn.END_OF_STREAM)
                            {
                                break;
                            }
                        }

                        if (c == StreamIn.END_OF_STREAM)
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        State = LEX_CONTENT;
                        Waswhite = false;
                        return Token; /* the endtag token */

                    case LEX_STARTTAG:
                        Txtstart = Lexsize - 1; /* set txtstart to first letter */
                        c = ParseTagName();
                        isempty.Val = false;
                        AttVal attributes = null;
                        Token = NewNode((isempty.Val ? Node.START_END_TAG : Node.START_TAG), Lexbuf, Txtstart, Txtend,
                                        GetString(Lexbuf, Txtstart, Txtend - Txtstart));

                        /* parse attributes, consuming closing ">" */
                        if (c != '>')
                        {
                            if (c == '/')
                            {
                                Input.UngetChar(c);
                            }

                            attributes = ParseAttrs(isempty);
                        }

                        if (isempty.Val)
                        {
                            Token.Type = Node.START_END_TAG;
                        }

                        Token.Attributes = attributes;
                        Lexsize = Txtstart;
                        Txtend = Txtstart;

                        /* swallow newline following start tag */
                        /* special check needed for CRLF sequence */
                        /* this doesn't apply to empty elements */

                        if (ExpectsContent(Token) || Token.Tag == Options.TagTable.TagBr)
                        {
                            c = Input.ReadChar();
                            if (c == '\r')
                            {
                                c = Input.ReadChar();

                                if (c != '\n')
                                {
                                    Input.UngetChar(c);
                                }
                            }
                            else if (c != '\n' && c != '\f')
                            {
                                Input.UngetChar(c);
                            }

                            Waswhite = true; /* to swallow leading whitespace */
                        }
                        else
                        {
                            Waswhite = false;
                        }

                        State = LEX_CONTENT;

                        if (Token.Tag == null)
                        {
                            Report.Error(this, null, Token, Report.UNKNOWN_ELEMENT);
                        }
                        else if (!Options.XmlTags)
                        {
                            Versions &= Token.Tag.Versions;

                            if ((Token.Tag.Versions & HtmlVersion.Proprietary) != 0)
                            {
                                if (!Options.MakeClean &&
                                    (Token.Tag == Options.TagTable.TagNobr || Token.Tag == Options.TagTable.TagWbr))
                                {
                                    Report.Warning(this, null, Token, Report.PROPRIETARY_ELEMENT);
                                }
                            }

                            if (Token.Tag.CheckAttribs != null)
                            {
                                Token.CheckUniqueAttributes(this);
                                Token.Tag.CheckAttribs.Check(this, Token);
                            }
                            else
                            {
                                Token.CheckAttributes(this);
                            }
                        }
                        return Token; /* return start tag */

                    case LEX_COMMENT:
                        if (c != '-')
                        {
                            continue;
                        }

                        c = Input.ReadChar();
                        AddCharToLexer(c);
                        if (c != '-')
                        {
                            continue;
                        }

                        while (true)
                        {
                            c = Input.ReadChar();

                            if (c == '>')
                            {
                                if (badcomment != 0)
                                {
                                    Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
                                }

                                Txtend = Lexsize - 2; // AQ 8Jul2000
                                Lexbuf[Lexsize] = (byte) '\x0000';
                                State = LEX_CONTENT;
                                Waswhite = false;
                                Token = NewNode(Node.COMMENT_TAG, Lexbuf, Txtstart, Txtend);

                                /* now look for a line break */

                                c = Input.ReadChar();

                                if (c == '\r')
                                {
                                    c = Input.ReadChar();

                                    if (c != '\n')
                                    {
                                        Token.Linebreak = true;
                                    }
                                }

                                if (c == '\n')
                                {
                                    Token.Linebreak = true;
                                }
                                else
                                {
                                    Input.UngetChar(c);
                                }

                                return Token;
                            }

                            /* note position of first such error in the comment */
                            if (badcomment == 0)
                            {
                                Lines = Input.CursorLine;
                                Columns = Input.CursorColumn - 3;
                            }

                            badcomment++;
                            if (Options.FixComments)
                            {
                                Lexbuf[Lexsize - 2] = (byte) '=';
                            }

                            AddCharToLexer(c);

                            /* if '-' then look for '>' to end the comment */
                            if (c != '-')
                            {
                                break;
                            }
                        }

                        /* otherwise continue to look for --> */
                        Lexbuf[Lexsize - 2] = (byte) '=';
                        continue;

                    case LEX_DOCTYPE:
                        map = Map((char) c);

                        if ((map & WHITE) != 0)
                        {
                            if (Waswhite)
                            {
                                Lexsize -= 1;
                            }

                            Waswhite = true;
                        }
                        else
                        {
                            Waswhite = false;
                        }

                        if (c != '>')
                        {
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.DOC_TYPE_TAG, Lexbuf, Txtstart, Txtend);
                        /* make a note of the version named by the doctype */
                        Doctype = FindGivenVersion(Token);
                        return Token;

                    case LEX_PROCINSTR:

                        if (Lexsize - Txtstart == 3)
                        {
                            if ((GetString(Lexbuf, Txtstart, 3)).Equals("php"))
                            {
                                State = LEX_PHP;
                                continue;
                            }
                        }

                        if (Options.XmlPIs)
                        {
                            /* insist on ?> as terminator */
                            if (c != '?')
                            {
                                continue;
                            }

                            /* now look for '>' */
                            c = Input.ReadChar();

                            if (c == StreamIn.END_OF_STREAM)
                            {
                                Report.Warning(this, null, null, Report.UNEXPECTED_END_OF_FILE);
                                Input.UngetChar(c);
                                continue;
                            }

                            AddCharToLexer(c);
                        }

                        if (c != '>')
                        {
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.PROC_INS_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_ASP:
                        if (c != '%')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();

                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.ASP_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_JSTE:
                        if (c != '#')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.JSTE_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_PHP:
                        if (c != '?')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.PHP_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_SECTION:
                        if (c == '[')
                        {
                            if (Lexsize == (Txtstart + 6) && (GetString(Lexbuf, Txtstart, 6)).Equals("CDATA["))
                            {
                                State = LEX_CDATA;
                                Lexsize -= 6;
                                continue;
                            }
                        }

                        if (c != ']')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.SECTION_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_CDATA:
                        if (c != ']')
                        {
                            continue;
                        }

                        /* now look for ']' */
                        c = Input.ReadChar();
                        if (c != ']')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.CDATA_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;
                }
            }

            if (State == LEX_CONTENT)
            {
                /* text string */
                Txtend = Lexsize;
                if (Txtend > Txtstart)
                {
                    Input.UngetChar(c);
                    if (Lexbuf[Lexsize - 1] == (byte) ' ')
                    {
                        Lexsize -= 1;
                        Txtend = Lexsize;
                    }

                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                    return Token;
                }
            }
            else if (State == LEX_COMMENT)
            {
                /* comment */
                if (c == StreamIn.END_OF_STREAM)
                {
                    Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
                }

                Txtend = Lexsize;
                Lexbuf[Lexsize] = (byte) '\x0000';
                State = LEX_CONTENT;
                Waswhite = false;
                Token = NewNode(Node.COMMENT_TAG, Lexbuf, Txtstart, Txtend);
                return Token;
            }

            return null;
        }
Exemplo n.º 30
0
		/* fixup doctype if missing */
		public virtual bool FixDocType(Node root)
		{
			Node doctype;
			HtmlVersion guessed = HtmlVersion.Html40Strict;
			int i;
			
			if (this.badDoctype)
			{
				Report.Warning(this, null, null, Report.MALFORMED_DOCTYPE);
			}
			
			if (Options.XmlOut)
			{
				return true;
			}
			
			doctype = root.FindDocType();
			
			if (Options.DocType == TidyNet.DocType.Omit)
			{
				if (doctype != null)
				{
					Node.DiscardElement(doctype);
				}
				return true;
			}
			
			if (Options.DocType == TidyNet.DocType.Strict)
			{
				Node.DiscardElement(doctype);
				doctype = null;
				guessed = HtmlVersion.Html40Strict;
			}
			else if (Options.DocType == TidyNet.DocType.Loose)
			{
				Node.DiscardElement(doctype);
				doctype = null;
				guessed = HtmlVersion.Html40Loose;
			}
			else if (Options.DocType == TidyNet.DocType.Auto)
			{
				if (doctype != null)
				{
					if (this.doctype == HtmlVersion.Unknown)
					{
						return false;
					}

					switch (this.doctype)
					{
					case HtmlVersion.Unknown:
						return false;

					case HtmlVersion.Html20:
						if ((this.versions & HtmlVersion.Html20) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Html32:
						if ((this.versions & HtmlVersion.Html32) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Html40Strict:
						if ((this.versions & HtmlVersion.Html40Strict) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Html40Loose:
						if ((this.versions & HtmlVersion.Html40Loose) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Frames:
						if ((this.versions & HtmlVersion.Frames) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
					}
					
					/* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
				}
				
				/* choose new doctype */
				guessed = GetHtmlVersion();
			}
			
			if (guessed == HtmlVersion.Unknown)
			{
				return false;
			}
			
			/* for XML use the Voyager system identifier */
			if (this.Options.XmlOut || this.Options.XmlTags || this.isvoyager)
			{
				if (doctype != null)
					Node.DiscardElement(doctype);
				
				for (i = 0; i < W3CVersion.Length; ++i)
				{
					if (guessed == W3CVersion[i].Version)
					{
						FixHtmlNameSpace(root, W3CVersion[i].Profile);
						break;
					}
				}
				
				return true;
			}
			
			if (doctype == null)
			{
				doctype = NewNode(Node.DocTypeTag, this.lexbuf, 0, 0);
				doctype.Next = root.Content;
				doctype.Parent = root;
				doctype.Prev = null;
				root.Content = doctype;
			}
			
			this.txtstart = this.lexsize;
			this.txtend = this.lexsize;
			
			/* use the appropriate public identifier */
			AddStringLiteral("html PUBLIC ");
			
			if (Options.DocType == TidyNet.DocType.User && Options.DocTypeStr != null)
			{
				AddStringLiteral(Options.DocTypeStr);
			}
			else if (guessed == HtmlVersion.Html20)
			{
				AddStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
			}
			else
			{
				AddStringLiteral("\"-//W3C//DTD ");
				
				for (i = 0; i < W3CVersion.Length; ++i)
				{
					if (guessed == W3CVersion[i].Version)
					{
						AddStringLiteral(W3CVersion[i].Name);
						break;
					}
				}
				
				AddStringLiteral("//EN\"");
			}
			
			this.txtend = this.lexsize;
			
			doctype.Start = this.txtstart;
			doctype.End = this.txtend;
			
			return true;
		}
Exemplo n.º 31
0
		public virtual bool SetXhtmlDocType(Node root)
		{
			string fpi = " ";
			string sysid = "";
			string namespace_Renamed = XHTML_NAMESPACE;
			Node doctype;
			
			doctype = root.FindDocType();
			
			if (Options.DocType == TidyNet.DocType.Omit)
			{
				if (doctype != null)
					Node.DiscardElement(doctype);
				return true;
			}
			
			if (Options.DocType == TidyNet.DocType.Auto)
			{
				/* see what flavor of XHTML this document matches */
				if ((this.versions & HtmlVersion.Html40Strict) != 0)
				{
					/* use XHTML strict */
					fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
					sysid = voyager_strict;
				}
				else if ((this.versions & HtmlVersion.Loose) != 0)
				{
					fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
					sysid = voyager_loose;
				}
				else if ((this.versions & HtmlVersion.Frames) != 0)
				{
					/* use XHTML frames */
					fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
					sysid = voyager_frameset;
				}
				else
				{
					/* lets assume XHTML transitional */
					fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
					sysid = voyager_loose;
				}
			}
			else if (Options.DocType == TidyNet.DocType.Strict)
			{
				fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
				sysid = voyager_strict;
			}
			else if (Options.DocType == TidyNet.DocType.Loose)
			{
				fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
				sysid = voyager_loose;
			}
			
			FixHtmlNameSpace(root, namespace_Renamed);
			
			if (doctype == null)
			{
				doctype = NewNode(Node.DocTypeTag, this.lexbuf, 0, 0);
				doctype.Next = root.Content;
				doctype.Parent = root;
				doctype.Prev = null;
				root.Content = doctype;
			}
			
			if (Options.DocType == TidyNet.DocType.User && Options.DocTypeStr != null)
			{
				fpi = Options.DocTypeStr;
				sysid = "";
			}
			
			this.txtstart = this.lexsize;
			this.txtend = this.lexsize;
			
			/* add public identifier */
			AddStringLiteral("html PUBLIC ");
			
			/* check if the fpi is quoted or not */
			if (fpi[0] == '"')
			{
				AddStringLiteral(fpi);
			}
			else
			{
				AddStringLiteral("\"");
				AddStringLiteral(fpi);
				AddStringLiteral("\"");
			}
			
			if (sysid.Length + 6 >= this.Options.WrapLen)
			{
				AddStringLiteral("\n\"");
			}
			else
			{
				AddStringLiteral("\n    \"");
			}

			/* add system identifier */
			AddStringLiteral(sysid);
			AddStringLiteral("\"");
			
			this.txtend = this.lexsize;
			
			doctype.Start = this.txtstart;
			doctype.End = this.txtend;
			
			return false;
		}
Exemplo n.º 32
0
			public W3CVersionInfo(string name, string voyagerName, string profile, HtmlVersion version)
			{
				Name = name;
				VoyagerName = voyagerName;
				Profile = profile;
				Version = version;
			}
Exemplo n.º 33
0
 public void SetHtmlVersion(HtmlVersion htmlVersion)
 {
     _htmlVersion = htmlVersion;
 }
Exemplo n.º 34
0
 public void Walk_ReturnsCorrectDocType(string docTypeId, HtmlVersion htmlVersion, string expectedDocType)
 {
     var docType = DocTypeFactory.GetDocType(docTypeId, htmlVersion);
     Assert.That(docType, Is.EqualTo(expectedDocType));
 }
Exemplo n.º 35
0
        private static string GetXmlDocType(IList<string> docTypeParts, HtmlVersion htmlVersion)
        {
            if (htmlVersion != HtmlVersion.XHtml) return "";

            string encoding = docTypeParts.Count < 2
                ? "utf-8"
                : docTypeParts[1];
            return "<?xml version='1.0' encoding='" + encoding + "' ?>";
        }
Exemplo n.º 36
0
 public void Render(TextWriter writer, T model, HtmlVersion htmlVersion)
 {
     Model = model;
     base.Render(writer, htmlVersion);
 }
Exemplo n.º 37
0
		public Lexer(StreamIn input, TidyOptions options)
		{
			this.input = input;
			this.lines = 1;
			this.columns = 1;
			this.state = LEX_CONTENT;
			this.badAccess = 0;
			this.badLayout = 0;
			this.badChars = 0;
			this.badForm = 0;
			this.waswhite = false;
			this.pushed = false;
			this.insertspace = false;
			this.exiled = false;
			this.isvoyager = false;
			this.versions = HtmlVersion.Everything;
			this.doctype = HtmlVersion.Unknown;
			this.badDoctype = false;
			this.txtstart = 0;
			this.txtend = 0;
			this.token = null;
			this.lexbuf = null;
			this.lexlength = 0;
			this.lexsize = 0;
			this.inode = null;
			this.insert = - 1;
			this.istack = new Stack();
			this.istackbase = 0;
			this.styles = null;
			this.Options = options;
			this.seenBodyEndTag = 0;
			this.nodeList = new ArrayList();
		}
Exemplo n.º 38
0
 public void Render(TextWriter writer, HtmlVersion htmlVersion)
 {
     Render(writer, htmlVersion, ViewData ?? new Dictionary <string, object>());
 }
Exemplo n.º 39
0
 public void SetHtmlVersion(HtmlVersion htmlVersion)
 {
     _htmlVersion = htmlVersion;
 }
Exemplo n.º 40
0
		/*
		modes for GetToken()
		
		MixedContent   -- for elements which don't accept PCDATA
		Preformatted       -- white space preserved as is
		IgnoreMarkup       -- for CDATA elements such as script, style
		*/
		public virtual Node GetToken(short mode)
		{
			short map;
			int c = 0;
			int lastc;
			int badcomment = 0;
			MutableBoolean isempty = new MutableBoolean();
			AttVal attributes;
			
			if (pushed)
			{
				/* duplicate inlines in preference to pushed text nodes when appropriate */
				if (token.Type != Node.TextNode || (insert == - 1 && inode == null))
				{
					pushed = false;
					return token;
				}
			}
			
			/* at start of block elements, unclosed inline
			elements are inserted into the token stream */
			
			if (insert != - 1 || inode != null)
			{
				return InsertedToken();
			}
			
			lines = input.curline;
			columns = input.curcol;
			waswhite = false;
			
			txtstart = lexsize;
			txtend = lexsize;
			
			while (true)
			{
				c = input.ReadChar();
				if (c == StreamIn.EndOfStream)
				{
					break;
				}

				if (insertspace && mode != IgnoreWhitespace)
				{
					AddCharToLexer(' ');
					waswhite = true;
					insertspace = false;
				}
				
				/* treat \r\n as \n and \r as \n */
				
				if (c == '\r')
				{
					c = input.ReadChar();
					
					if (c != '\n')
					{
						input.UngetChar(c);
					}
					
					c = '\n';
				}
				
				AddCharToLexer(c);
				
				switch (state)
				{
				case LEX_CONTENT: 
					map = MAP((char) c);
						
					/*
						Discard white space if appropriate. Its cheaper
						to do this here rather than in parser methods
						for elements that don't have mixed content.
						*/
					if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) && lexsize == txtstart + 1)
					{
						--lexsize;
						waswhite = false;
						lines = input.curline;
						columns = input.curcol;
						continue;
					}
						
					if (c == '<')
					{
						state = LEX_GT;
						continue;
					}
						
					if ((map & WHITE) != 0)
					{
						/* was previous char white? */
						if (waswhite)
						{
							if (mode != Preformatted && mode != IgnoreMarkup)
							{
								--lexsize;
								lines = input.curline;
								columns = input.curcol;
							}
						}
							/* prev char wasn't white */
						else
						{
							waswhite = true;
							lastc = c;
								
							if (mode != Preformatted && mode != IgnoreMarkup && c != ' ')
							{
								ChangeChar((byte) ' ');
							}
						}
							
						continue;
					}
					else if (c == '&' && mode != IgnoreMarkup)
					{
						ParseEntity(mode);
					}
						
					/* this is needed to avoid trimming trailing whitespace */
					if (mode == IgnoreWhitespace)
						mode = MixedContent;
						
					waswhite = false;
					continue;
					
					
				case LEX_GT: 
					if (c == '/')
					{
						c = input.ReadChar();
						if (c == StreamIn.EndOfStream)
						{
							input.UngetChar(c);
							continue;
						}
							
						AddCharToLexer(c);
						map = MAP((char) c);
							
						if ((map & LETTER) != 0)
						{
							lexsize -= 3;
							txtend = lexsize;
							input.UngetChar(c);
							state = LEX_ENDTAG;
							lexbuf[lexsize] = (byte) '\x0000'; /* debug */
							input.curcol -= 2;
								
							/* if some text before the </ return it now */
							if (txtend > txtstart)
							{
								/* trim space char before end tag */
								if (mode == IgnoreWhitespace && lexbuf[lexsize - 1] == (byte) ' ')
								{
									lexsize -= 1;
									txtend = lexsize;
								}
									
								token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
								return token;
							}
								
							continue; /* no text so keep going */
						}
							
						/* otherwise treat as CDATA */
						waswhite = false;
						state = LEX_CONTENT;
						continue;
					}
						
					if (mode == IgnoreMarkup)
					{
						/* otherwise treat as CDATA */
						waswhite = false;
						state = LEX_CONTENT;
						continue;
					}
						
					/*
						look out for comments, doctype or marked sections
						this isn't quite right, but its getting there ...
						*/
					if (c == '!')
					{
						c = input.ReadChar();
						if (c == '-')
						{
							c = input.ReadChar();
							if (c == '-')
							{
								state = LEX_COMMENT; /* comment */
								lexsize -= 2;
								txtend = lexsize;
									
								/* if some text before < return it now */
								if (txtend > txtstart)
								{
									token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
									return token;
								}
									
								txtstart = lexsize;
								continue;
							}
								
							Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
						}
						else if (c == 'd' || c == 'D')
						{
							state = LEX_DOCTYPE; /* doctype */
							lexsize -= 2;
							txtend = lexsize;
							mode = IgnoreWhitespace;
								
							/* skip until white space or '>' */
								
							for (; ; )
							{
								c = input.ReadChar();
									
								if (c == StreamIn.EndOfStream || c == '>')
								{
									input.UngetChar(c);
									break;
								}
									
								map = MAP((char) c);
								if ((map & WHITE) == 0)
								{
									continue;
								}
									
								/* and skip to end of whitespace */
									
								for (; ; )
								{
									c = input.ReadChar();
										
									if (c == StreamIn.EndOfStream || c == '>')
									{
										input.UngetChar(c);
										break;
									}
										
									map = MAP((char) c);
										
									if ((map & WHITE) != 0)
									{
										continue;
									}
										
									input.UngetChar(c);
									break;
								}
									
								break;
							}
								
							/* if some text before < return it now */
							if (txtend > txtstart)
							{
								token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
								return token;
							}
								
							txtstart = lexsize;
							continue;
						}
						else if (c == '[')
						{
							/* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
							lexsize -= 2;
							state = LEX_SECTION;
							txtend = lexsize;
								
							/* if some text before < return it now */
							if (txtend > txtstart)
							{
								token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
								return token;
							}
								
							txtstart = lexsize;
							continue;
						}
							
						/* otherwise swallow chars up to and including next '>' */
						while (true)
						{
							c = input.ReadChar();
							if (c == '>')
							{
								break;
							}
							if (c == - 1)
							{
								input.UngetChar(c);
								break;
							}
						}
							
						lexsize -= 2;
						lexbuf[lexsize] = (byte) '\x0000';
						state = LEX_CONTENT;
						continue;
					}
						
					/*
						processing instructions
						*/
						
					if (c == '?')
					{
						lexsize -= 2;
						state = LEX_PROCINSTR;
						txtend = lexsize;
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						txtstart = lexsize;
						continue;
					}
						
					/* Microsoft ASP's e.g. <% ... server-code ... %> */
					if (c == '%')
					{
						lexsize -= 2;
						state = LEX_ASP;
						txtend = lexsize;
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						txtstart = lexsize;
						continue;
					}
						
					/* Netscapes JSTE e.g. <# ... server-code ... #> */
					if (c == '#')
					{
						lexsize -= 2;
						state = LEX_JSTE;
						txtend = lexsize;
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						txtstart = lexsize;
						continue;
					}
						
					map = MAP((char) c);
						
					/* check for start tag */
					if ((map & LETTER) != 0)
					{
						input.UngetChar(c); /* push back letter */
						lexsize -= 2; /* discard "<" + letter */
						txtend = lexsize;
						state = LEX_STARTTAG; /* ready to read tag name */
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						continue; /* no text so keep going */
					}
						
					/* otherwise treat as CDATA */
					state = LEX_CONTENT;
					waswhite = false;
					continue;
					
					
				case LEX_ENDTAG: 
					txtstart = lexsize - 1;
					input.curcol += 2;
					c = ParseTagName();
					token = NewNode(Node.EndTag, lexbuf, txtstart, txtend, GetString(lexbuf, txtstart, txtend - txtstart));
					lexsize = txtstart;
					txtend = txtstart;
						
					/* skip to '>' */
					while (c != '>')
					{
						c = input.ReadChar();
						if (c == StreamIn.EndOfStream)
						{
							break;
						}
					}
						
					if (c == StreamIn.EndOfStream)
					{
						input.UngetChar(c);
						continue;
					}
						
					state = LEX_CONTENT;
					waswhite = false;
					return token; /* the endtag token */
					
					
				case LEX_STARTTAG: 
					txtstart = lexsize - 1; /* set txtstart to first letter */
					c = ParseTagName();
					isempty.Val = false;
					attributes = null;
					token = NewNode((isempty.Val ? Node.StartEndTag : Node.StartTag), lexbuf, txtstart, txtend, GetString(lexbuf, txtstart, txtend - txtstart));
						
					/* parse attributes, consuming closing ">" */
					if (c != '>')
					{
						if (c == '/')
						{
							input.UngetChar(c);
						}
							
						attributes = ParseAttrs(isempty);
					}
						
					if (isempty.Val)
					{
						token.Type = Node.StartEndTag;
					}
						
					token.Attributes = attributes;
					lexsize = txtstart;
					txtend = txtstart;
						
					/* swallow newline following start tag */
					/* special check needed for CRLF sequence */
					/* this doesn't apply to empty elements */
						
					if (ExpectsContent(token) || token.Tag == Options.tt.TagBr)
					{
						c = input.ReadChar();
						if (c == '\r')
						{
							c = input.ReadChar();
								
							if (c != '\n')
							{
								input.UngetChar(c);
							}
						}
						else if (c != '\n' && c != '\f')
						{
							input.UngetChar(c);
						}
							
						waswhite = true; /* to swallow leading whitespace */
					}
					else
					{
						waswhite = false;
					}
						
					state = LEX_CONTENT;
						
					if (token.Tag == null)
					{
						Report.Error(this, null, token, Report.UNKNOWN_ELEMENT);
					}
					else if (!Options.XmlTags)
					{
						versions &= token.Tag.Versions;
							
						if ((token.Tag.Versions & HtmlVersion.Proprietary) != 0)
						{
							if (!Options.MakeClean && (token.Tag == Options.tt.TagNobr || token.Tag == Options.tt.TagWbr))
							{
								Report.Warning(this, null, token, Report.PROPRIETARY_ELEMENT);
							}
						}
							
						if (token.Tag.CheckAttribs != null)
						{
							token.CheckUniqueAttributes(this);
							token.Tag.CheckAttribs.Check(this, this.token);
						}
						else
						{
							token.CheckAttributes(this);
						}
					}
					return token; /* return start tag */

				case LEX_COMMENT: 
					if (c != '-')
					{
						continue;
					}
						
					c = input.ReadChar();
					AddCharToLexer(c);
					if (c != '-')
					{
						continue;
					}
						
					while (true)
					{
						c = input.ReadChar();
							
						if (c == '>')
						{
							if (badcomment != 0)
							{
								Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
							}
								
							txtend = lexsize - 2; // AQ 8Jul2000
							lexbuf[lexsize] = (byte) '\x0000';
							state = LEX_CONTENT;
							waswhite = false;
							token = NewNode(Node.CommentTag, lexbuf, txtstart, txtend);
								
							/* now look for a line break */
								
							c = input.ReadChar();
								
							if (c == '\r')
							{
								c = input.ReadChar();
									
								if (c != '\n')
								{
									token.Linebreak = true;
								}
							}
								
							if (c == '\n')
							{
								token.Linebreak = true;
							}
							else
							{
								input.UngetChar(c);
							}
								
							return token;
						}
							
						/* note position of first such error in the comment */
						if (badcomment == 0)
						{
							lines = input.curline;
							columns = input.curcol - 3;
						}
							
						badcomment++;
						if (Options.FixComments)
						{
							lexbuf[lexsize - 2] = (byte) '=';
						}
							
						AddCharToLexer(c);
							
						/* if '-' then look for '>' to end the comment */
						if (c != '-')
						{
							break;
						}
					}
						
					/* otherwise continue to look for --> */
					lexbuf[lexsize - 2] = (byte) '=';
					continue;
					
					
				case LEX_DOCTYPE: 
					map = MAP((char) c);
						
					if ((map & WHITE) != 0)
					{
						if (waswhite)
						{
							lexsize -= 1;
						}
							
						waswhite = true;
					}
					else
					{
						waswhite = false;
					}
						
					if (c != '>')
					{
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.DocTypeTag, lexbuf, txtstart, txtend);
					/* make a note of the version named by the doctype */
					doctype = FindGivenVersion(token);
					return token;
					
					
				case LEX_PROCINSTR: 
						
					if (lexsize - txtstart == 3)
					{
						if ((GetString(lexbuf, txtstart, 3)).Equals("php"))
						{
							state = LEX_PHP;
							continue;
						}
					}
						
					if (Options.XmlPIs)
					{
						/* insist on ?> as terminator */
						if (c != '?')
						{
							continue;
						}
							
						/* now look for '>' */
						c = input.ReadChar();
							
						if (c == StreamIn.EndOfStream)
						{
							Report.Warning(this, null, null, Report.UNEXPECTED_END_OF_FILE);
							input.UngetChar(c);
							continue;
						}
							
						AddCharToLexer(c);
					}
						
					if (c != '>')
					{
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.ProcInsTag, lexbuf, txtstart, txtend);
					return token;
					
					
				case LEX_ASP: 
					if (c != '%')
					{
						continue;
					}
						
					/* now look for '>' */
					c = input.ReadChar();

					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.AspTag, lexbuf, txtstart, txtend);
					return this.token;

				case LEX_JSTE: 
					if (c != '#')
					{
						continue;
					}
	
					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.JsteTag, lexbuf, txtstart, txtend);
					return token;

				case LEX_PHP: 
					if (c != '?')
					{
						continue;
					}
						
					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.PhpTag, lexbuf, txtstart, txtend);
					return token;
					
					
				case LEX_SECTION: 
					if (c == '[')
					{
						if (lexsize == (txtstart + 6) && (GetString(lexbuf, txtstart, 6)).Equals("CDATA["))
						{
							state = LEX_CDATA;
							lexsize -= 6;
							continue;
						}
					}
						
					if (c != ']')
					{
						continue;
					}

					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.SectionTag, lexbuf, txtstart, txtend);
					return token;

				case LEX_CDATA: 
					if (c != ']')
					{
						continue;
					}

					/* now look for ']' */
					c = input.ReadChar();
					if (c != ']')
					{
						input.UngetChar(c);
						continue;
					}
						
					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.CDATATag, lexbuf, txtstart, txtend);
					return token;
				}
			}
			
			if (state == LEX_CONTENT)
			{
				/* text string */
				txtend = lexsize;
				if (txtend > txtstart)
				{
					input.UngetChar(c);
					if (lexbuf[lexsize - 1] == (byte) ' ')
					{
						lexsize -= 1;
						txtend = lexsize;
					}
					
					token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
					return token;
				}
			}
			else if (state == LEX_COMMENT)
			{
				/* comment */
				if (c == StreamIn.EndOfStream)
				{
					Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
				}
				
				txtend = lexsize;
				lexbuf[lexsize] = (byte) '\x0000';
				state = LEX_CONTENT;
				waswhite = false;
				token = NewNode(Node.CommentTag, lexbuf, txtstart, txtend);
				return token;
			}
			
			return null;
		}
Exemplo n.º 41
0
        public void Walk_ReturnsCorrectDocType(string docTypeId, HtmlVersion htmlVersion, string expectedDocType)
        {
            var docType = DocTypeFactory.GetDocType(docTypeId, htmlVersion);

            Assert.That(docType, Is.EqualTo(expectedDocType));
        }
Exemplo n.º 42
0
 public void Render(TextWriter writer, HtmlVersion htmlVersion)
 {
     Render(writer, htmlVersion, ViewData ?? new Dictionary<string, object>());
 }