public static void ReportVersion(Lexer lexer, Node doctype) { int state = 0; string vers = lexer.HtmlVersionName(); var cc = new MutableInteger(); if (doctype != null) { var docTypeStr = new StringBuilder(); int i; for (i = doctype.Start; i < doctype.End; ++i) { int c = doctype.Textarray[i]; /* look for UTF-8 multibyte character */ if (c < 0) { i += PPrint.GetUtf8(doctype.Textarray, i, cc); c = cc.Val; } if (c == '"') { ++state; } else if (state == 1) { docTypeStr.Append((char)c); } } lexer.Messages.Add(new TidyMessage(lexer, String.Format(GetMessage("doctype_given"), docTypeStr), MessageLevel.Info)); } lexer.Messages.Add(new TidyMessage(lexer, String.Format(GetMessage("report_version"), (vers ?? "HTML proprietary")), MessageLevel.Info)); }
/* 1010 A 1011 B 1100 C 1101 D 1110 E 1111 F */ /* return one less that the number of bytes used by UTF-8 char */ /* str points to 1st byte, *ch initialized to 1st byte */ public static int GetUtf8(byte[] str, int start, MutableInteger ch) { int c, n, i, bytes; c = (str[start]) & 0xFF; // Convert to unsigned. if ((c & 0xE0) == 0xC0) { /* 110X XXXX two bytes */ n = c & 31; bytes = 2; } else if ((c & 0xF0) == 0xE0) { /* 1110 XXXX three bytes */ n = c & 15; bytes = 3; } else if ((c & 0xF8) == 0xF0) { /* 1111 0XXX four bytes */ n = c & 7; bytes = 4; } else if ((c & 0xFC) == 0xF8) { /* 1111 10XX five bytes */ n = c & 3; bytes = 5; } else if ((c & 0xFE) == 0xFC) { /* 1111 110X six bytes */ n = c & 1; bytes = 6; } else { /* 0XXX XXXX one byte */ ch.Val = c; return 0; } /* successor bytes should have the form 10XX XXXX */ for (i = 1; i < bytes; ++i) { c = (str[start + i]) & 0xFF; // Convert to unsigned. n = (n << 6) | (c & 0x3F); } ch.Val = n; return bytes - 1; }
/* The line buffer is uint not char so we can hold Unicode values unencoded. The translation to UTF-8 is deferred to the outc routine called to flush the line buffer. */ private void PrintText(Out fout, int mode, int indent, byte[] textarray, int start, int end) { int i; var ci = new MutableInteger(); for (i = start; i < end; ++i) { if (indent + _linelen >= _options.WrapLen) { WrapLine(fout, indent); } int c = (textarray[i]) & 0xFF; /* look for UTF-8 multibyte character */ if (c > 0x7F) { i += GetUtf8(textarray, i, ci); c = ci.Val; } if (c == '\n') { FlushLine(fout, indent); continue; } PrintChar(c, mode); } }
private void PrintAttrValue(Out fout, int indent, string val, int delim, bool wrappable) { var ci = new MutableInteger(); bool wasinstring = false; byte[] valueChars = null; int mode = (wrappable ? (NORMAL | ATTRIBVALUE) : (PREFORMATTED | ATTRIBVALUE)); if (val != null) { valueChars = Lexer.GetBytes(val); } /* look for ASP, Tango or PHP instructions for computed attribute value */ if (valueChars != null && valueChars.Length >= 5 && valueChars[0] == '<') { var tmpChar = new char[valueChars.Length]; valueChars.CopyTo(tmpChar, 0); if (valueChars[1] == '%' || valueChars[1] == '@' || (new string(tmpChar, 0, 5)).Equals("<?php")) mode |= CDATA; } if (delim == 0) { delim = '"'; } AddC('=', _linelen++); /* don't wrap after "=" for xml documents */ if (!_options.XmlOut) { if (indent + _linelen < _options.WrapLen) { _wraphere = _linelen; } if (indent + _linelen >= _options.WrapLen) { WrapLine(fout, indent); } if (indent + _linelen < _options.WrapLen) { _wraphere = _linelen; } else { CondFlushLine(fout, indent); } } AddC(delim, _linelen++); if (val != null) { _inString = false; int i = 0; while (valueChars != null && i < valueChars.Length) { int c = (valueChars[i]) & 0xFF; if (wrappable && c == ' ' && indent + _linelen < _options.WrapLen) { _wraphere = _linelen; wasinstring = _inString; } if (wrappable && _wraphere > 0 && indent + _linelen >= _options.WrapLen) WrapAttrVal(fout, indent, wasinstring); if (c == delim) { string entity = (c == '"' ? """ : "'"); for (int j = 0; j < entity.Length; j++) { AddC(entity[j], _linelen++); } ++i; continue; } if (c == '"') { if (_options.QuoteMarks) { AddC('&', _linelen++); AddC('q', _linelen++); AddC('u', _linelen++); AddC('o', _linelen++); AddC('t', _linelen++); AddC(';', _linelen++); } else { AddC('"', _linelen++); } if (delim == '\'') { _inString = !_inString; } ++i; continue; } if (c == '\'') { if (_options.QuoteMarks) { AddC('&', _linelen++); AddC('#', _linelen++); AddC('3', _linelen++); AddC('9', _linelen++); AddC(';', _linelen++); } else { AddC('\'', _linelen++); } if (delim == '"') { _inString = !_inString; } ++i; continue; } /* look for UTF-8 multibyte character */ if (c > 0x7F) { i += GetUtf8(valueChars, i, ci); c = ci.Val; } ++i; if (c == '\n') { FlushLine(fout, indent); continue; } PrintChar(c, mode); } } _inString = false; AddC(delim, _linelen++); }
/* map non-breaking spaces to regular spaces */ private static void NormalizeSpaces(Node node) { while (node != null) { if (node.Content != null) { NormalizeSpaces(node.Content); } if (node.Type == Node.TEXT_NODE) { int i; var c = new MutableInteger(); int p = node.Start; for (i = node.Start; i < node.End; ++i) { c.Val = node.Textarray[i]; /* look for UTF-8 multibyte character */ if (c.Val > 0x7F) { i += PPrint.GetUtf8(node.Textarray, i, c); } if (c.Val == 160) { c.Val = ' '; } p = PPrint.PutUtf8(node.Textarray, p, c.Val); } } node = node.Next; } }
/* values start with "=" or " = " etc. */ /* doesn't consume the ">" at end of start tag */ public virtual string ParseValue(string name, bool foldCase, MutableBoolean isempty, MutableInteger pdelim) { int len; int start; short map; bool seenGt = false; bool munge = true; int c; int delim, quotewarning; string val; delim = 0; pdelim.Val = '"'; /* Henry Zrepa reports that some folk are using the embed element with script attributes where newlines are significant and must be preserved */ if (Options.LiteralAttribs) munge = false; /* skip white space before the '=' */ for (;;) { c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM) { Input.UngetChar(c); break; } map = Map((char) c); if ((map & WHITE) == 0) { break; } } /* c should be '=' if there is a value other legal possibilities are white space, '/' and '>' */ if (c != '=') { Input.UngetChar(c); return null; } /* skip white space after '=' */ for (;;) { c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM) { Input.UngetChar(c); break; } map = Map((char) c); if ((map & WHITE) == 0) break; } /* check for quote marks */ if (c == '"' || c == '\'') delim = c; else if (c == '<') { start = Lexsize; AddCharToLexer(c); pdelim.Val = ParseServerInstruction(); len = Lexsize - start; Lexsize = start; return (len > 0 ? GetString(Lexbuf, start, len) : null); } else { Input.UngetChar(c); } /* and read the value string check for quote mark if needed */ quotewarning = 0; start = Lexsize; c = '\x0000'; for (;;) { int lastc = c; c = Input.ReadChar(); if (c == StreamIn.END_OF_STREAM) { Report.AttrError(this, Token, null, Report.UNEXPECTED_END_OF_FILE); Input.UngetChar(c); break; } if (delim == (char) 0) { if (c == '>') { Input.UngetChar(c); break; } if (c == '"' || c == '\'') { Report.AttrError(this, Token, null, Report.UNEXPECTED_QUOTEMARK); break; } if (c == '<') { /* in.UngetChar(c); */ Report.AttrError(this, Token, null, Report.UNEXPECTED_GT); /* break; */ } /* For cases like <br clear=all/> need to avoid treating /> as part of the attribute value, however care is needed to avoid so treating <a href=http://www.acme.com/> in this way, which would map the <a> tag to <a href="http://www.acme.com"/> */ if (c == '/') { /* peek ahead in case of /> */ c = Input.ReadChar(); if (c == '>' && !AttributeTable.DefaultAttributeTable.IsUrl(name)) { isempty.Val = true; Input.UngetChar(c); break; } /* unget peeked char */ Input.UngetChar(c); c = '/'; } } /* delim is '\'' or '"' */ else { if (c == delim) { break; } /* treat CRLF, CR and LF as single line break */ if (c == '\r') { c = Input.ReadChar(); if (c != '\n') { Input.UngetChar(c); } c = '\n'; } if (c == '\n' || c == '<' || c == '>') ++quotewarning; if (c == '>') seenGt = true; } if (c == '&') { AddCharToLexer(c); ParseEntity(0); continue; } /* kludge for JavaScript attribute values with line continuations in string literals */ if (c == '\\') { c = Input.ReadChar(); if (c != '\n') { Input.UngetChar(c); c = '\\'; } } map = Map((char) c); if ((map & WHITE) != 0) { if (delim == (char) 0) break; if (munge) { c = ' '; if (lastc == ' ') continue; } } else if (foldCase && (map & UPPERCASE) != 0) c += ('a' - 'A'); AddCharToLexer(c); } if (quotewarning > 10 && seenGt && munge) { /* there is almost certainly a missing trailling quote mark as we have see too many newlines, < or > characters. an exception is made for Javascript attributes and the javascript URL scheme which may legitimately include < and > */ if (!AttributeTable.DefaultAttributeTable.IsScript(name) && !(AttributeTable.DefaultAttributeTable.IsUrl(name) && (GetString(Lexbuf, start, 11)).Equals("javascript:"))) Report.Error(this, null, null, Report.SUSPECTED_MISSING_QUOTE); } len = Lexsize - start; Lexsize = start; if (len > 0 || delim != 0) { val = GetString(Lexbuf, start, len); } else { val = null; } /* note delimiter if given */ pdelim.Val = delim != 0 ? delim : '"'; return val; }
/* swallows closing '>' */ public virtual AttVal ParseAttrs(MutableBoolean isempty) { var delim = new MutableInteger(); var asp = new MutableObject(); var php = new MutableObject(); AttVal list = null; while (!EndOfInput()) { string attribute = ParseAttribute(isempty, asp, php); AttVal av; if (attribute == null) { /* check if attributes are created by ASP markup */ if (asp.Object != null) { av = new AttVal(list, null, (Node) asp.Object, null, '\x0000', null, null); list = av; continue; } /* check if attributes are created by PHP markup */ if (php.Object != null) { av = new AttVal(list, null, null, (Node) php.Object, '\x0000', null, null); list = av; continue; } break; } string val = ParseValue(attribute, false, isempty, delim); if (IsValidAttrName(attribute)) { av = new AttVal(list, null, null, null, delim.Val, attribute, val); av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av); list = av; } else { //av = new AttVal(null, null, null, null, 0, attribute, val); Report.AttrError(this, Token, val, Report.BAD_ATTRIBUTE_VALUE); } } return list; }
public static void ReportVersion(Lexer lexer, Node doctype) { int state = 0; string vers = lexer.HtmlVersionName(); var cc = new MutableInteger(); if (doctype != null) { var docTypeStr = new StringBuilder(); int i; for (i = doctype.Start; i < doctype.End; ++i) { int c = doctype.Textarray[i]; /* look for UTF-8 multibyte character */ if (c < 0) { i += PPrint.GetUtf8(doctype.Textarray, i, cc); c = cc.Val; } if (c == '"') { ++state; } else if (state == 1) { docTypeStr.Append((char) c); } } lexer.Messages.Add(new TidyMessage(lexer, String.Format(GetMessage("doctype_given"), docTypeStr), MessageLevel.Info)); } lexer.Messages.Add(new TidyMessage(lexer, String.Format(GetMessage("report_version"), (vers ?? "HTML proprietary")), MessageLevel.Info)); }