public virtual void Check(Lexer lexer, Node node) { AttVal attval; Attribute attribute; bool hasAlt = false; bool hasHref = false; node.CheckUniqueAttributes(lexer); for (attval = node.Attributes; attval != null; attval = attval.Next) { attribute = attval.CheckAttribute(lexer, node); if (attribute == AttributeTable.AttrAlt) { hasAlt = true; } else if (attribute == AttributeTable.AttrHref) { hasHref = true; } } if (!hasAlt) { lexer.badAccess |= Report.MISSING_LINK_ALT; Report.AttrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE); } if (!hasHref) { Report.AttrError(lexer, node, "href", Report.MISSING_ATTRIBUTE); } }
public virtual void Check(Lexer lexer, Node node) { node.CheckUniqueAttributes(lexer); AttVal lang = node.GetAttrByName("language"); AttVal type = node.GetAttrByName("type"); if (type == null) { Report.AttrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); /* check for javascript */ if (lang != null) { string str = lang.Val; if (str.Length > 10) { str = str.Substring(0, 10); } if ((String.Compare(str, "javascript") == 0) || (String.Compare(str, "jscript") == 0)) { node.AddAttribute("type", "text/javascript"); } } else { node.AddAttribute("type", "text/javascript"); } } }
public virtual void Check(Lexer lexer, Node node) { AttVal attval; string val = null; node.CheckUniqueAttributes(lexer); for (attval = node.Attributes; attval != null; attval = attval.Next) { if (String.Compare(attval.Attribute, "align") == 0) { val = attval.Val; break; } } if (val != null) { if (String.Compare(val, "left") == 0 || String.Compare(val, "right") == 0) { lexer.versions &= HtmlVersion.Html40Loose | HtmlVersion.Frames; } else if (String.Compare(val, "top") == 0 || String.Compare(val, "bottom") == 0) { lexer.versions &= HtmlVersion.From32; } else { Report.AttrError(lexer, node, val, Report.BAD_ATTRIBUTE_VALUE); } } }
public virtual void Check(Lexer lexer, Node node) { AttVal type = node.GetAttrByName("type"); node.CheckUniqueAttributes(lexer); if (type == null) { Report.AttrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); node.AddAttribute("type", "text/css"); } }
public virtual void Check(Lexer lexer, Node node) { node.CheckUniqueAttributes(lexer); /* HTML4 strict doesn't allow mixed content for elements with %block; as their content model */ if (node.GetAttrByName("width") != null || node.GetAttrByName("height") != null) { lexer.versions &= ~ HtmlVersion.Html40Strict; } }
public virtual void Check(Lexer lexer, Node node) { AttVal attval; node.CheckUniqueAttributes(lexer); for (attval = node.Attributes; attval != null; attval = attval.Next) { Attribute attribute = attval.CheckAttribute(lexer, node); if (attribute == AttributeTable.AttrXmlns) { lexer.isvoyager = true; } } }
public virtual void Check(Lexer lexer, Node node) { AttVal rel = node.GetAttrByName("rel"); node.CheckUniqueAttributes(lexer); if (rel != null && rel.Val != null && rel.Val.Equals("stylesheet")) { AttVal type = node.GetAttrByName("type"); if (type == null) { Report.AttrError(lexer, node, "type", Report.MISSING_ATTRIBUTE); node.AddAttribute("type", "text/css"); } } }
public virtual void Check(Lexer lexer, Node node) { AttVal attval; Attribute attribute; bool hasSummary = false; node.CheckUniqueAttributes(lexer); for (attval = node.Attributes; attval != null; attval = attval.Next) { attribute = attval.CheckAttribute(lexer, node); if (attribute == AttributeTable.AttrSummary) { hasSummary = true; } } /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */ if (!hasSummary && lexer.doctype != HtmlVersion.Html20 && lexer.doctype != HtmlVersion.Html32) { lexer.badAccess |= Report.MISSING_SUMMARY; Report.AttrError(lexer, node, "summary", Report.MISSING_ATTRIBUTE); } /* convert <table border> to <table border="1"> */ if (lexer.Options.XmlOut) { attval = node.GetAttrByName("border"); if (attval != null) { if (attval.Val == null) { attval.Val = "1"; } } } }
public virtual void Check(Lexer lexer, Node node) { node.CheckUniqueAttributes(lexer); lexer.FixId(node); }
/* modes for GetToken() MixedContent -- for elements which don't accept PCDATA Preformatted -- white space preserved as is IgnoreMarkup -- for CDATA elements such as script, style */ public virtual Node GetToken(short mode) { short map; int c = 0; int lastc; int badcomment = 0; MutableBoolean isempty = new MutableBoolean(); AttVal attributes; if (pushed) { /* duplicate inlines in preference to pushed text nodes when appropriate */ if (token.Type != Node.TextNode || (insert == - 1 && inode == null)) { pushed = false; return token; } } /* at start of block elements, unclosed inline elements are inserted into the token stream */ if (insert != - 1 || inode != null) { return InsertedToken(); } lines = input.curline; columns = input.curcol; waswhite = false; txtstart = lexsize; txtend = lexsize; while (true) { c = input.ReadChar(); if (c == StreamIn.EndOfStream) { break; } if (insertspace && mode != IgnoreWhitespace) { AddCharToLexer(' '); waswhite = true; insertspace = false; } /* treat \r\n as \n and \r as \n */ if (c == '\r') { c = input.ReadChar(); if (c != '\n') { input.UngetChar(c); } c = '\n'; } AddCharToLexer(c); switch (state) { case LEX_CONTENT: map = MAP((char) c); /* Discard white space if appropriate. Its cheaper to do this here rather than in parser methods for elements that don't have mixed content. */ if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) && lexsize == txtstart + 1) { --lexsize; waswhite = false; lines = input.curline; columns = input.curcol; continue; } if (c == '<') { state = LEX_GT; continue; } if ((map & WHITE) != 0) { /* was previous char white? */ if (waswhite) { if (mode != Preformatted && mode != IgnoreMarkup) { --lexsize; lines = input.curline; columns = input.curcol; } } /* prev char wasn't white */ else { waswhite = true; lastc = c; if (mode != Preformatted && mode != IgnoreMarkup && c != ' ') { ChangeChar((byte) ' '); } } continue; } else if (c == '&' && mode != IgnoreMarkup) { ParseEntity(mode); } /* this is needed to avoid trimming trailing whitespace */ if (mode == IgnoreWhitespace) mode = MixedContent; waswhite = false; continue; case LEX_GT: if (c == '/') { c = input.ReadChar(); if (c == StreamIn.EndOfStream) { input.UngetChar(c); continue; } AddCharToLexer(c); map = MAP((char) c); if ((map & LETTER) != 0) { lexsize -= 3; txtend = lexsize; input.UngetChar(c); state = LEX_ENDTAG; lexbuf[lexsize] = (byte) '\x0000'; /* debug */ input.curcol -= 2; /* if some text before the </ return it now */ if (txtend > txtstart) { /* trim space char before end tag */ if (mode == IgnoreWhitespace && lexbuf[lexsize - 1] == (byte) ' ') { lexsize -= 1; txtend = lexsize; } token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } continue; /* no text so keep going */ } /* otherwise treat as CDATA */ waswhite = false; state = LEX_CONTENT; continue; } if (mode == IgnoreMarkup) { /* otherwise treat as CDATA */ waswhite = false; state = LEX_CONTENT; continue; } /* look out for comments, doctype or marked sections this isn't quite right, but its getting there ... */ if (c == '!') { c = input.ReadChar(); if (c == '-') { c = input.ReadChar(); if (c == '-') { state = LEX_COMMENT; /* comment */ lexsize -= 2; txtend = lexsize; /* if some text before < return it now */ if (txtend > txtstart) { token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } txtstart = lexsize; continue; } Report.Warning(this, null, null, Report.MALFORMED_COMMENT); } else if (c == 'd' || c == 'D') { state = LEX_DOCTYPE; /* doctype */ lexsize -= 2; txtend = lexsize; mode = IgnoreWhitespace; /* skip until white space or '>' */ for (; ; ) { c = input.ReadChar(); if (c == StreamIn.EndOfStream || c == '>') { input.UngetChar(c); break; } map = MAP((char) c); if ((map & WHITE) == 0) { continue; } /* and skip to end of whitespace */ for (; ; ) { c = input.ReadChar(); if (c == StreamIn.EndOfStream || c == '>') { input.UngetChar(c); break; } map = MAP((char) c); if ((map & WHITE) != 0) { continue; } input.UngetChar(c); break; } break; } /* if some text before < return it now */ if (txtend > txtstart) { token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } txtstart = lexsize; continue; } else if (c == '[') { /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */ lexsize -= 2; state = LEX_SECTION; txtend = lexsize; /* if some text before < return it now */ if (txtend > txtstart) { token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } txtstart = lexsize; continue; } /* otherwise swallow chars up to and including next '>' */ while (true) { c = input.ReadChar(); if (c == '>') { break; } if (c == - 1) { input.UngetChar(c); break; } } lexsize -= 2; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; continue; } /* processing instructions */ if (c == '?') { lexsize -= 2; state = LEX_PROCINSTR; txtend = lexsize; /* if some text before < return it now */ if (txtend > txtstart) { token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } txtstart = lexsize; continue; } /* Microsoft ASP's e.g. <% ... server-code ... %> */ if (c == '%') { lexsize -= 2; state = LEX_ASP; txtend = lexsize; /* if some text before < return it now */ if (txtend > txtstart) { token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } txtstart = lexsize; continue; } /* Netscapes JSTE e.g. <# ... server-code ... #> */ if (c == '#') { lexsize -= 2; state = LEX_JSTE; txtend = lexsize; /* if some text before < return it now */ if (txtend > txtstart) { token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } txtstart = lexsize; continue; } map = MAP((char) c); /* check for start tag */ if ((map & LETTER) != 0) { input.UngetChar(c); /* push back letter */ lexsize -= 2; /* discard "<" + letter */ txtend = lexsize; state = LEX_STARTTAG; /* ready to read tag name */ /* if some text before < return it now */ if (txtend > txtstart) { token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } continue; /* no text so keep going */ } /* otherwise treat as CDATA */ state = LEX_CONTENT; waswhite = false; continue; case LEX_ENDTAG: txtstart = lexsize - 1; input.curcol += 2; c = ParseTagName(); token = NewNode(Node.EndTag, lexbuf, txtstart, txtend, GetString(lexbuf, txtstart, txtend - txtstart)); lexsize = txtstart; txtend = txtstart; /* skip to '>' */ while (c != '>') { c = input.ReadChar(); if (c == StreamIn.EndOfStream) { break; } } if (c == StreamIn.EndOfStream) { input.UngetChar(c); continue; } state = LEX_CONTENT; waswhite = false; return token; /* the endtag token */ case LEX_STARTTAG: txtstart = lexsize - 1; /* set txtstart to first letter */ c = ParseTagName(); isempty.Val = false; attributes = null; token = NewNode((isempty.Val ? Node.StartEndTag : Node.StartTag), lexbuf, txtstart, txtend, GetString(lexbuf, txtstart, txtend - txtstart)); /* parse attributes, consuming closing ">" */ if (c != '>') { if (c == '/') { input.UngetChar(c); } attributes = ParseAttrs(isempty); } if (isempty.Val) { token.Type = Node.StartEndTag; } token.Attributes = attributes; lexsize = txtstart; txtend = txtstart; /* swallow newline following start tag */ /* special check needed for CRLF sequence */ /* this doesn't apply to empty elements */ if (ExpectsContent(token) || token.Tag == Options.tt.TagBr) { c = input.ReadChar(); if (c == '\r') { c = input.ReadChar(); if (c != '\n') { input.UngetChar(c); } } else if (c != '\n' && c != '\f') { input.UngetChar(c); } waswhite = true; /* to swallow leading whitespace */ } else { waswhite = false; } state = LEX_CONTENT; if (token.Tag == null) { Report.Error(this, null, token, Report.UNKNOWN_ELEMENT); } else if (!Options.XmlTags) { versions &= token.Tag.Versions; if ((token.Tag.Versions & HtmlVersion.Proprietary) != 0) { if (!Options.MakeClean && (token.Tag == Options.tt.TagNobr || token.Tag == Options.tt.TagWbr)) { Report.Warning(this, null, token, Report.PROPRIETARY_ELEMENT); } } if (token.Tag.CheckAttribs != null) { token.CheckUniqueAttributes(this); token.Tag.CheckAttribs.Check(this, this.token); } else { token.CheckAttributes(this); } } return token; /* return start tag */ case LEX_COMMENT: if (c != '-') { continue; } c = input.ReadChar(); AddCharToLexer(c); if (c != '-') { continue; } while (true) { c = input.ReadChar(); if (c == '>') { if (badcomment != 0) { Report.Warning(this, null, null, Report.MALFORMED_COMMENT); } txtend = lexsize - 2; // AQ 8Jul2000 lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.CommentTag, lexbuf, txtstart, txtend); /* now look for a line break */ c = input.ReadChar(); if (c == '\r') { c = input.ReadChar(); if (c != '\n') { token.Linebreak = true; } } if (c == '\n') { token.Linebreak = true; } else { input.UngetChar(c); } return token; } /* note position of first such error in the comment */ if (badcomment == 0) { lines = input.curline; columns = input.curcol - 3; } badcomment++; if (Options.FixComments) { lexbuf[lexsize - 2] = (byte) '='; } AddCharToLexer(c); /* if '-' then look for '>' to end the comment */ if (c != '-') { break; } } /* otherwise continue to look for --> */ lexbuf[lexsize - 2] = (byte) '='; continue; case LEX_DOCTYPE: map = MAP((char) c); if ((map & WHITE) != 0) { if (waswhite) { lexsize -= 1; } waswhite = true; } else { waswhite = false; } if (c != '>') { continue; } lexsize -= 1; txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.DocTypeTag, lexbuf, txtstart, txtend); /* make a note of the version named by the doctype */ doctype = FindGivenVersion(token); return token; case LEX_PROCINSTR: if (lexsize - txtstart == 3) { if ((GetString(lexbuf, txtstart, 3)).Equals("php")) { state = LEX_PHP; continue; } } if (Options.XmlPIs) { /* insist on ?> as terminator */ if (c != '?') { continue; } /* now look for '>' */ c = input.ReadChar(); if (c == StreamIn.EndOfStream) { Report.Warning(this, null, null, Report.UNEXPECTED_END_OF_FILE); input.UngetChar(c); continue; } AddCharToLexer(c); } if (c != '>') { continue; } lexsize -= 1; txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.ProcInsTag, lexbuf, txtstart, txtend); return token; case LEX_ASP: if (c != '%') { continue; } /* now look for '>' */ c = input.ReadChar(); if (c != '>') { input.UngetChar(c); continue; } lexsize -= 1; txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.AspTag, lexbuf, txtstart, txtend); return this.token; case LEX_JSTE: if (c != '#') { continue; } /* now look for '>' */ c = input.ReadChar(); if (c != '>') { input.UngetChar(c); continue; } lexsize -= 1; txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.JsteTag, lexbuf, txtstart, txtend); return token; case LEX_PHP: if (c != '?') { continue; } /* now look for '>' */ c = input.ReadChar(); if (c != '>') { input.UngetChar(c); continue; } lexsize -= 1; txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.PhpTag, lexbuf, txtstart, txtend); return token; case LEX_SECTION: if (c == '[') { if (lexsize == (txtstart + 6) && (GetString(lexbuf, txtstart, 6)).Equals("CDATA[")) { state = LEX_CDATA; lexsize -= 6; continue; } } if (c != ']') { continue; } /* now look for '>' */ c = input.ReadChar(); if (c != '>') { input.UngetChar(c); continue; } lexsize -= 1; txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.SectionTag, lexbuf, txtstart, txtend); return token; case LEX_CDATA: if (c != ']') { continue; } /* now look for ']' */ c = input.ReadChar(); if (c != ']') { input.UngetChar(c); continue; } /* now look for '>' */ c = input.ReadChar(); if (c != '>') { input.UngetChar(c); continue; } lexsize -= 1; txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.CDATATag, lexbuf, txtstart, txtend); return token; } } if (state == LEX_CONTENT) { /* text string */ txtend = lexsize; if (txtend > txtstart) { input.UngetChar(c); if (lexbuf[lexsize - 1] == (byte) ' ') { lexsize -= 1; txtend = lexsize; } token = NewNode(Node.TextNode, lexbuf, txtstart, txtend); return token; } } else if (state == LEX_COMMENT) { /* comment */ if (c == StreamIn.EndOfStream) { Report.Warning(this, null, null, Report.MALFORMED_COMMENT); } txtend = lexsize; lexbuf[lexsize] = (byte) '\x0000'; state = LEX_CONTENT; waswhite = false; token = NewNode(Node.CommentTag, lexbuf, txtstart, txtend); return token; } return null; }
public virtual void Check(Lexer lexer, Node node) { node.CheckUniqueAttributes(lexer); lexer.FixId(node); }
public virtual void Check(Lexer lexer, Node node) { AttVal attval; Attribute attribute; bool hasAlt = false; bool hasSrc = false; bool hasUseMap = false; bool hasIsMap = false; bool hasDataFld = false; node.CheckUniqueAttributes(lexer); for (attval = node.Attributes; attval != null; attval = attval.Next) { attribute = attval.CheckAttribute(lexer, node); if (attribute == AttributeTable.AttrAlt) { hasAlt = true; } else if (attribute == AttributeTable.AttrSrc) { hasSrc = true; } else if (attribute == AttributeTable.AttrUsemap) { hasUseMap = true; } else if (attribute == AttributeTable.AttrIsmap) { hasIsMap = true; } else if (attribute == AttributeTable.AttrDatafld) { hasDataFld = true; } else if (attribute == AttributeTable.AttrWidth || attribute == AttributeTable.AttrHeight) { lexer.versions &= ~ HtmlVersion.Html20; } } if (!hasAlt) { lexer.badAccess |= Report.MISSING_IMAGE_ALT; Report.AttrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE); if (lexer.Options.AltText != null) { node.AddAttribute("alt", lexer.Options.AltText); } } if (!hasSrc && !hasDataFld) { Report.AttrError(lexer, node, "src", Report.MISSING_ATTRIBUTE); } if (hasIsMap && !hasUseMap) { Report.AttrError(lexer, node, "ismap", Report.MISSING_IMAGEMAP); } }