private static bool TryParseHtmlTagDeclaration(ref StringSlice text, StringBuilder builder) { var c = text.CurrentChar; bool hasAlpha = false; while (c.IsAlphaUpper()) { builder.Append(c); c = text.NextChar(); hasAlpha = true; } if (!hasAlpha || !c.IsWhitespace()) { return(false); } // Regexp: "\\![A-Z]+\\s+[^>\\x00]*>" while (true) { builder.Append(c); c = text.NextChar(); if (c == '\0') { return(false); } if (c == '>') { text.NextChar(); builder.Append('>'); return(true); } } }
private static bool TryParseHtmlTagCData(ref StringSlice text, ref ValueStringBuilder builder) { if (text.Match("[CDATA[")) { builder.Append("[CDATA["); text.Start += 6; char c = '\0'; while (true) { var pc = c; c = text.NextChar(); if (c == '\0') { return(false); } builder.Append(c); if (c == ']' && pc == ']' && text.PeekChar() == '>') { text.SkipChar(); text.SkipChar(); builder.Append('>'); return(true); } } } return(false); }
internal static bool TryParseHtmlCloseTag(ref StringSlice text, StringBuilder builder) { // </[A-Za-z][A-Za-z0-9]+\s*> builder.Append('/'); var c = text.NextChar(); if (!c.IsAlpha()) { return(false); } builder.Append(c); bool skipSpaces = false; while (true) { c = text.NextChar(); if (c == '>') { text.NextChar(); builder.Append('>'); return(true); } if (skipSpaces) { if (c != ' ') { break; } } else if (c == ' ') { skipSpaces = true; } else if (!(c.IsAlphaNumeric() || c == '-')) { break; } builder.Append(c); } return(false); }
public static bool TryParseHtmlTag(ref StringSlice text, StringBuilder builder) { if (builder is null) { ThrowHelper.ArgumentNullException(nameof(builder)); } var c = text.CurrentChar; if (c != '<') { return(false); } c = text.NextChar(); builder.Append('<'); switch (c) { case '/': return(TryParseHtmlCloseTag(ref text, builder)); case '?': return(TryParseHtmlTagProcessingInstruction(ref text, builder)); case '!': builder.Append(c); c = text.NextChar(); if (c == '-') { return(TryParseHtmlTagHtmlComment(ref text, builder)); } if (c == '[') { return(TryParseHtmlTagCData(ref text, builder)); } return(TryParseHtmlTagDeclaration(ref text, builder)); } return(TryParseHtmlTagOpenTag(ref text, builder)); }
private static bool TryParseHtmlTagHtmlComment(ref StringSlice text, StringBuilder builder) { var c = text.NextChar(); if (c != '-') { return(false); } builder.Append('-'); builder.Append('-'); if (text.PeekChar() == '>') { return(false); } var countHyphen = 0; while (true) { c = text.NextChar(); if (c == '\0') { return(false); } if (countHyphen == 2) { if (c == '>') { builder.Append('>'); text.NextChar(); return(true); } return(false); } countHyphen = c == '-' ? countHyphen + 1 : 0; builder.Append(c); } }
private static bool TryParseHtmlTagCData(ref StringSlice text, StringBuilder builder) { builder.Append('['); var c = text.NextChar(); if (c == 'C' && text.NextChar() == 'D' && text.NextChar() == 'A' && text.NextChar() == 'T' && text.NextChar() == 'A' && (c = text.NextChar()) == '[') { builder.Append("CDATA["); while (true) { var pc = c; c = text.NextChar(); if (c == '\0') { return(false); } if (c == ']' && pc == ']') { builder.Append(']'); c = text.NextChar(); if (c == '>') { builder.Append('>'); text.NextChar(); return(true); } if (c == '\0') { return(false); } } builder.Append(c); } } return(false); }
private static bool TryParseHtmlTag(ref StringSlice text, ref ValueStringBuilder builder) { var c = text.CurrentChar; if (c != '<') { return(false); } c = text.NextChar(); builder.Append('<'); switch (c) { case '/': return(TryParseHtmlCloseTag(ref text, ref builder)); case '?': return(TryParseHtmlTagProcessingInstruction(ref text, ref builder)); case '!': builder.Append(c); c = text.NextChar(); if (c == '-') { return(TryParseHtmlTagHtmlComment(ref text, ref builder)); } if (c == '[') { return(TryParseHtmlTagCData(ref text, ref builder)); } return(TryParseHtmlTagDeclaration(ref text, ref builder)); } return(TryParseHtmlTagOpenTag(ref text, ref builder)); }
private static bool TryParseHtmlTagProcessingInstruction(ref StringSlice text, StringBuilder builder) { builder.Append('?'); var prevChar = '\0'; while (true) { var c = text.NextChar(); if (c == '\0') { return(false); } if (c == '>' && prevChar == '?') { builder.Append('>'); text.NextChar(); return(true); } prevChar = c; builder.Append(c); } }
public static bool TryParseInlineLink(ref StringSlice text, out string link, out string title, out SourceSpan linkSpan, out SourceSpan titleSpan) { // 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 3. an optional link destination, // 4. an optional link title separated from the link destination by whitespace, // 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 6. and a right parenthesis ) bool isValid = false; var c = text.CurrentChar; link = null; title = null; linkSpan = SourceSpan.Empty; titleSpan = SourceSpan.Empty; // 1. An inline link consists of a link text followed immediately by a left parenthesis (, if (c == '(') { text.NextChar(); text.TrimStart(); var pos = text.Start; if (TryParseUrl(ref text, out link)) { linkSpan.Start = pos; linkSpan.End = text.Start - 1; if (linkSpan.End < linkSpan.Start) { linkSpan = SourceSpan.Empty; } int spaceCount; text.TrimStart(out spaceCount); var hasWhiteSpaces = spaceCount > 0; c = text.CurrentChar; if (c == ')') { isValid = true; } else if (hasWhiteSpaces) { c = text.CurrentChar; pos = text.Start; if (c == ')') { isValid = true; } else if (TryParseTitle(ref text, out title)) { titleSpan.Start = pos; titleSpan.End = text.Start - 1; if (titleSpan.End < titleSpan.Start) { titleSpan = SourceSpan.Empty; } text.TrimStart(); c = text.CurrentChar; if (c == ')') { isValid = true; } } } } } if (isValid) { // Skip ')' text.NextChar(); title = title ?? String.Empty; } return(isValid); }
public static bool TryParseAutolink(ref StringSlice text, out string link, out bool isEmail) { link = null; isEmail = false; var c = text.CurrentChar; if (c != '<') { return(false); } // An absolute URI, for these purposes, consists of a scheme followed by a colon (:) // followed by zero or more characters other than ASCII whitespace and control characters, <, and >. // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space). // A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop. // a scheme is any sequence of 2–32 characters // beginning with an ASCII letter // and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”). // An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec: // /^ // [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ // @ // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ c = text.NextChar(); // -1: scan email // 0: scan uri or email // +1: scan uri int state = 0; if (!c.IsAlpha()) { // We may have an email char? if (c.IsDigit() || CharHelper.IsEmailUsernameSpecialChar(c)) { state = -1; } else { return(false); } } var builder = StringBuilderCache.Local(); // **************************** // 1. Scan scheme or user email // **************************** builder.Append(c); while (true) { c = text.NextChar(); // Chars valid for both scheme and email var isSpecialChar = c == '+' || c == '.' || c == '-'; var isValidChar = c.IsAlphaNumeric() || isSpecialChar; if (state <= 0 && CharHelper.IsEmailUsernameSpecialChar(c)) { isValidChar = true; // If this is not a special char valid also for url scheme, then we have an email if (!isSpecialChar) { state = -1; } } if (isValidChar) { // a scheme is any sequence of 2–32 characters if (state > 0 && builder.Length >= 32) { builder.Length = 0; return(false); } builder.Append(c); } else if (c == ':') { if (state < 0 || builder.Length <= 2) { builder.Length = 0; return(false); } state = 1; break; } else if (c == '@') { if (state > 0) { builder.Length = 0; return(false); } state = -1; break; } else { builder.Length = 0; return(false); } } // append ':' or '@' builder.Append(c); if (state < 0) { isEmail = true; // scan an email // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ bool hasMinus = false; int domainCharCount = 0; char pc = '\0'; while (true) { c = text.NextChar(); if (c == '>') { if (domainCharCount == 0 || hasMinus) { break; } text.NextChar(); link = builder.ToString(); builder.Length = 0; return(true); } if (c.IsAlphaNumeric() || (domainCharCount > 0 && (hasMinus = c == '-'))) { domainCharCount++; if (domainCharCount > 63) { break; } } else if (c == '.') { if (pc == '.' || pc == '-') { break; } domainCharCount = 0; hasMinus = false; } else { break; } builder.Append(c); } } else { // scan an uri // An absolute URI, for these purposes, consists of a scheme followed by a colon (:) // followed by zero or more characters other than ASCII whitespace and control characters, <, and >. // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space). while (true) { c = text.NextChar(); if (c == '\0') { break; } if (c == '>') { text.NextChar(); link = builder.ToString(); builder.Length = 0; return(true); } // Chars valid for both scheme and email if (c > ' ' && c < 127 && c != '<') { builder.Append(c); } else { break; } } } builder.Length = 0; return(false); }
internal static bool TryParseHtmlTagOpenTag(ref StringSlice text, StringBuilder builder) { var c = text.CurrentChar; // Parse the tagname if (!c.IsAlpha()) { return(false); } builder.Append(c); while (true) { c = text.NextChar(); if (c.IsAlphaNumeric() || c == '-') { builder.Append(c); } else { break; } } bool hasAttribute = false; while (true) { var hasWhitespaces = false; // Skip any whitespaces while (c.IsWhitespace()) { builder.Append(c); c = text.NextChar(); hasWhitespaces = true; } switch (c) { case '\0': return(false); case '>': text.NextChar(); builder.Append(c); return(true); case '/': builder.Append('/'); c = text.NextChar(); if (c != '>') { return(false); } text.NextChar(); builder.Append('>'); return(true); case '=': if (!hasAttribute) { return(false); } builder.Append('='); // Skip any spaces after c = text.NextChar(); while (c.IsWhitespace()) { builder.Append(c); c = text.NextChar(); } // Parse a quoted string if (c == '\'' || c == '\"') { builder.Append(c); char openingStringChar = c; while (true) { c = text.NextChar(); if (c == '\0') { return(false); } if (c != openingStringChar) { builder.Append(c); } else { break; } } builder.Append(c); c = text.NextChar(); } else { // Parse until we match a space or a special html character int matchCount = 0; while (true) { if (c == '\0') { return(false); } if (c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`') { break; } matchCount++; builder.Append(c); c = text.NextChar(); } // We need at least one char after '=' if (matchCount == 0) { return(false); } } hasAttribute = false; continue; default: if (!hasWhitespaces) { return(false); } // Parse the attribute name if (!(c.IsAlpha() || c == '_' || c == ':')) { return(false); } builder.Append(c); while (true) { c = text.NextChar(); if (c.IsAlphaNumeric() || c == '_' || c == ':' || c == '.' || c == '-') { builder.Append(c); } else { break; } } hasAttribute = true; break; } } }