public virtual void Check(Lexer lexer, Node node) { node.CheckUniqueAttributes(lexer); AttVal lang = node.GetAttrByName("language"); AttVal type = node.GetAttrByName("type"); if (type == null) { Report.AttrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); /* check for javascript */ if (lang != null) { string str = lang.Val; if (str.Length > 10) { str = str.Substring(0, 10); } if ((String.CompareOrdinal(str, "javascript") == 0) || (String.CompareOrdinal(str, "jscript") == 0)) { node.AddAttribute("type", "text/javascript"); } } else { node.AddAttribute("type", "text/javascript"); } } }
public virtual void Check(Lexer lexer, Node node) { AttVal attval; string val = null; node.CheckUniqueAttributes(lexer); for (attval = node.Attributes; attval != null; attval = attval.Next) { if (String.CompareOrdinal(attval.Attribute, "align") == 0) { val = attval.Val; break; } } if (val != null) { if (String.CompareOrdinal(val, "left") == 0 || String.CompareOrdinal(val, "right") == 0) { lexer.Versions &= HtmlVersion.Html40Loose | HtmlVersion.Frames; } else if (String.CompareOrdinal(val, "top") == 0 || String.CompareOrdinal(val, "bottom") == 0) { lexer.Versions &= HtmlVersion.From32; } else { Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE); } } }
/* modes for GetToken() MixedContent -- for elements which don't accept PCDATA Preformatted -- white space preserved as is IgnoreMarkup -- for CDATA elements such as script, style */ public virtual Node GetToken(short mode) { int c; int badcomment = 0; var isempty = new MutableBoolean(); if (Pushed) { /* duplicate inlines in preference to pushed text nodes when appropriate */ if (Token.Type != Node.TEXT_NODE || (Insert == - 1 && Inode == null)) { Pushed = false; return Token; } } /* at start of block elements, unclosed inline elements are inserted into the token stream */ if (Insert != - 1 || Inode != null) { return InsertedToken(); } Lines = Input.CursorLine; Columns = Input.CursorColumn; Waswhite = false; Txtstart = Lexsize; Txtend = Lexsize; while (true) { c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM) { break; } if (Insertspace && mode != IGNORE_WHITESPACE) { AddCharToLexer(' '); Waswhite = true; Insertspace = false; } /* treat \r\n as \n and \r as \n */ if (c == '\r') { c = Input.ReadChar(); if (c != '\n') { Input.UngetChar(c); } c = '\n'; } AddCharToLexer(c); short map; switch (State) { case LEX_CONTENT: map = Map((char) c); /* Discard white space if appropriate. Its cheaper to do this here rather than in parser methods for elements that don't have mixed content. */ if (((map & WHITE) != 0) && (mode == IGNORE_WHITESPACE) && Lexsize == Txtstart + 1) { --Lexsize; Waswhite = false; Lines = Input.CursorLine; Columns = Input.CursorColumn; continue; } if (c == '<') { State = LEX_GT; continue; } if ((map & WHITE) != 0) { /* was previous char white? */ if (Waswhite) { if (mode != PREFORMATTED && mode != IGNORE_MARKUP) { --Lexsize; Lines = Input.CursorLine; Columns = Input.CursorColumn; } } /* prev char wasn't white */ else { Waswhite = true; if (mode != PREFORMATTED && mode != IGNORE_MARKUP && c != ' ') { ChangeChar((byte) ' '); } } continue; } if (c == '&' && mode != IGNORE_MARKUP) { ParseEntity(mode); } /* this is needed to avoid trimming trailing whitespace */ if (mode == IGNORE_WHITESPACE) mode = MIXED_CONTENT; Waswhite = false; continue; case LEX_GT: if (c == '/') { c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM) { Input.UngetChar(c); continue; } AddCharToLexer(c); map = Map((char) c); if ((map & LETTER) != 0) { Lexsize -= 3; Txtend = Lexsize; Input.UngetChar(c); State = LEX_ENDTAG; Lexbuf[Lexsize] = (byte) '\x0000'; /* debug */ Input.CursorColumn -= 2; /* if some text before the </ return it now */ if (Txtend > Txtstart) { /* trim space char before end tag */ if (mode == IGNORE_WHITESPACE && Lexbuf[Lexsize - 1] == (byte) ' ') { Lexsize -= 1; Txtend = Lexsize; } Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } continue; /* no text so keep going */ } /* otherwise treat as CDATA */ Waswhite = false; State = LEX_CONTENT; continue; } if (mode == IGNORE_MARKUP) { /* otherwise treat as CDATA */ Waswhite = false; State = LEX_CONTENT; continue; } /* look out for comments, doctype or marked sections this isn't quite right, but its getting there ... */ if (c == '!') { c = Input.ReadChar(); if (c == '-') { c = Input.ReadChar(); if (c == '-') { State = LEX_COMMENT; /* comment */ Lexsize -= 2; Txtend = Lexsize; /* if some text before < return it now */ if (Txtend > Txtstart) { Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } Txtstart = Lexsize; continue; } Report.Warning(this, null, null, Report.MALFORMED_COMMENT); } else if (c == 'd' || c == 'D') { State = LEX_DOCTYPE; /* doctype */ Lexsize -= 2; Txtend = Lexsize; mode = IGNORE_WHITESPACE; /* skip until white space or '>' */ for (;;) { c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM || c == '>') { Input.UngetChar(c); break; } map = Map((char) c); if ((map & WHITE) == 0) { continue; } /* and skip to end of whitespace */ for (;;) { c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM || c == '>') { Input.UngetChar(c); break; } map = Map((char) c); if ((map & WHITE) != 0) { continue; } Input.UngetChar(c); break; } break; } /* if some text before < return it now */ if (Txtend > Txtstart) { Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } Txtstart = Lexsize; continue; } else if (c == '[') { /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */ Lexsize -= 2; State = LEX_SECTION; Txtend = Lexsize; /* if some text before < return it now */ if (Txtend > Txtstart) { Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } Txtstart = Lexsize; continue; } /* otherwise swallow chars up to and including next '>' */ while (true) { c = Input.ReadChar(); if (c == '>') { break; } if (c == - 1) { Input.UngetChar(c); break; } } Lexsize -= 2; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; continue; } /* processing instructions */ if (c == '?') { Lexsize -= 2; State = LEX_PROCINSTR; Txtend = Lexsize; /* if some text before < return it now */ if (Txtend > Txtstart) { Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } Txtstart = Lexsize; continue; } /* Microsoft ASP's e.g. <% ... server-code ... %> */ if (c == '%') { Lexsize -= 2; State = LEX_ASP; Txtend = Lexsize; /* if some text before < return it now */ if (Txtend > Txtstart) { Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } Txtstart = Lexsize; continue; } /* Netscapes JSTE e.g. <# ... server-code ... #> */ if (c == '#') { Lexsize -= 2; State = LEX_JSTE; Txtend = Lexsize; /* if some text before < return it now */ if (Txtend > Txtstart) { Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } Txtstart = Lexsize; continue; } map = Map((char) c); /* check for start tag */ if ((map & LETTER) != 0) { Input.UngetChar(c); /* push back letter */ Lexsize -= 2; /* discard "<" + letter */ Txtend = Lexsize; State = LEX_STARTTAG; /* ready to read tag name */ /* if some text before < return it now */ if (Txtend > Txtstart) { Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } continue; /* no text so keep going */ } /* otherwise treat as CDATA */ State = LEX_CONTENT; Waswhite = false; continue; case LEX_ENDTAG: Txtstart = Lexsize - 1; Input.CursorColumn += 2; c = ParseTagName(); Token = NewNode(Node.END_TAG, Lexbuf, Txtstart, Txtend, GetString(Lexbuf, Txtstart, Txtend - Txtstart)); Lexsize = Txtstart; Txtend = Txtstart; /* skip to '>' */ while (c != '>') { c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM) { break; } } if (c == StreamIn.END_OF_STREAM) { Input.UngetChar(c); continue; } State = LEX_CONTENT; Waswhite = false; return Token; /* the endtag token */ case LEX_STARTTAG: Txtstart = Lexsize - 1; /* set txtstart to first letter */ c = ParseTagName(); isempty.Val = false; AttVal attributes = null; Token = NewNode((isempty.Val ? Node.START_END_TAG : Node.START_TAG), Lexbuf, Txtstart, Txtend, GetString(Lexbuf, Txtstart, Txtend - Txtstart)); /* parse attributes, consuming closing ">" */ if (c != '>') { if (c == '/') { Input.UngetChar(c); } attributes = ParseAttrs(isempty); } if (isempty.Val) { Token.Type = Node.START_END_TAG; } Token.Attributes = attributes; Lexsize = Txtstart; Txtend = Txtstart; /* swallow newline following start tag */ /* special check needed for CRLF sequence */ /* this doesn't apply to empty elements */ if (ExpectsContent(Token) || Token.Tag == Options.TagTable.TagBr) { c = Input.ReadChar(); if (c == '\r') { c = Input.ReadChar(); if (c != '\n') { Input.UngetChar(c); } } else if (c != '\n' && c != '\f') { Input.UngetChar(c); } Waswhite = true; /* to swallow leading whitespace */ } else { Waswhite = false; } State = LEX_CONTENT; if (Token.Tag == null) { Report.Error(this, null, Token, Report.UNKNOWN_ELEMENT); } else if (!Options.XmlTags) { Versions &= Token.Tag.Versions; if ((Token.Tag.Versions & HtmlVersion.Proprietary) != 0) { if (!Options.MakeClean && (Token.Tag == Options.TagTable.TagNobr || Token.Tag == Options.TagTable.TagWbr)) { Report.Warning(this, null, Token, Report.PROPRIETARY_ELEMENT); } } if (Token.Tag.CheckAttribs != null) { Token.CheckUniqueAttributes(this); Token.Tag.CheckAttribs.Check(this, Token); } else { Token.CheckAttributes(this); } } return Token; /* return start tag */ case LEX_COMMENT: if (c != '-') { continue; } c = Input.ReadChar(); AddCharToLexer(c); if (c != '-') { continue; } while (true) { c = Input.ReadChar(); if (c == '>') { if (badcomment != 0) { Report.Warning(this, null, null, Report.MALFORMED_COMMENT); } Txtend = Lexsize - 2; // AQ 8Jul2000 Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.COMMENT_TAG, Lexbuf, Txtstart, Txtend); /* now look for a line break */ c = Input.ReadChar(); if (c == '\r') { c = Input.ReadChar(); if (c != '\n') { Token.Linebreak = true; } } if (c == '\n') { Token.Linebreak = true; } else { Input.UngetChar(c); } return Token; } /* note position of first such error in the comment */ if (badcomment == 0) { Lines = Input.CursorLine; Columns = Input.CursorColumn - 3; } badcomment++; if (Options.FixComments) { Lexbuf[Lexsize - 2] = (byte) '='; } AddCharToLexer(c); /* if '-' then look for '>' to end the comment */ if (c != '-') { break; } } /* otherwise continue to look for --> */ Lexbuf[Lexsize - 2] = (byte) '='; continue; case LEX_DOCTYPE: map = Map((char) c); if ((map & WHITE) != 0) { if (Waswhite) { Lexsize -= 1; } Waswhite = true; } else { Waswhite = false; } if (c != '>') { continue; } Lexsize -= 1; Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.DOC_TYPE_TAG, Lexbuf, Txtstart, Txtend); /* make a note of the version named by the doctype */ Doctype = FindGivenVersion(Token); return Token; case LEX_PROCINSTR: if (Lexsize - Txtstart == 3) { if ((GetString(Lexbuf, Txtstart, 3)).Equals("php")) { State = LEX_PHP; continue; } } if (Options.XmlPIs) { /* insist on ?> as terminator */ if (c != '?') { continue; } /* now look for '>' */ c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM) { Report.Warning(this, null, null, Report.UNEXPECTED_END_OF_FILE); Input.UngetChar(c); continue; } AddCharToLexer(c); } if (c != '>') { continue; } Lexsize -= 1; Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.PROC_INS_TAG, Lexbuf, Txtstart, Txtend); return Token; case LEX_ASP: if (c != '%') { continue; } /* now look for '>' */ c = Input.ReadChar(); if (c != '>') { Input.UngetChar(c); continue; } Lexsize -= 1; Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.ASP_TAG, Lexbuf, Txtstart, Txtend); return Token; case LEX_JSTE: if (c != '#') { continue; } /* now look for '>' */ c = Input.ReadChar(); if (c != '>') { Input.UngetChar(c); continue; } Lexsize -= 1; Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.JSTE_TAG, Lexbuf, Txtstart, Txtend); return Token; case LEX_PHP: if (c != '?') { continue; } /* now look for '>' */ c = Input.ReadChar(); if (c != '>') { Input.UngetChar(c); continue; } Lexsize -= 1; Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.PHP_TAG, Lexbuf, Txtstart, Txtend); return Token; case LEX_SECTION: if (c == '[') { if (Lexsize == (Txtstart + 6) && (GetString(Lexbuf, Txtstart, 6)).Equals("CDATA[")) { State = LEX_CDATA; Lexsize -= 6; continue; } } if (c != ']') { continue; } /* now look for '>' */ c = Input.ReadChar(); if (c != '>') { Input.UngetChar(c); continue; } Lexsize -= 1; Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.SECTION_TAG, Lexbuf, Txtstart, Txtend); return Token; case LEX_CDATA: if (c != ']') { continue; } /* now look for ']' */ c = Input.ReadChar(); if (c != ']') { Input.UngetChar(c); continue; } /* now look for '>' */ c = Input.ReadChar(); if (c != '>') { Input.UngetChar(c); continue; } Lexsize -= 1; Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.CDATA_TAG, Lexbuf, Txtstart, Txtend); return Token; } } if (State == LEX_CONTENT) { /* text string */ Txtend = Lexsize; if (Txtend > Txtstart) { Input.UngetChar(c); if (Lexbuf[Lexsize - 1] == (byte) ' ') { Lexsize -= 1; Txtend = Lexsize; } Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend); return Token; } } else if (State == LEX_COMMENT) { /* comment */ if (c == StreamIn.END_OF_STREAM) { Report.Warning(this, null, null, Report.MALFORMED_COMMENT); } Txtend = Lexsize; Lexbuf[Lexsize] = (byte) '\x0000'; State = LEX_CONTENT; Waswhite = false; Token = NewNode(Node.COMMENT_TAG, Lexbuf, Txtstart, Txtend); return Token; } return null; }