internal static bool TryParseHtmlTagOpenTag(ref StringSlice text, StringBuilder builder) { var c = text.CurrentChar; // Parse the tagname if (!c.IsAlpha()) { return(false); } builder.Append(c); while (true) { c = text.NextChar(); if (c.IsAlphaNumeric() || c == '-') { builder.Append(c); } else { break; } } bool hasAttribute = false; while (true) { var hasWhitespaces = false; // Skip any whitespaces while (c.IsWhitespace()) { builder.Append(c); c = text.NextChar(); hasWhitespaces = true; } switch (c) { case '\0': return(false); case '>': text.NextChar(); builder.Append(c); return(true); case '/': builder.Append('/'); c = text.NextChar(); if (c != '>') { return(false); } text.NextChar(); builder.Append('>'); return(true); case '=': if (!hasAttribute) { return(false); } builder.Append('='); // Skip any spaces after c = text.NextChar(); while (c.IsWhitespace()) { builder.Append(c); c = text.NextChar(); } // Parse a quoted string if (c == '\'' || c == '\"') { builder.Append(c); char openingStringChar = c; while (true) { c = text.NextChar(); if (c == '\0') { return(false); } if (c != openingStringChar) { builder.Append(c); } else { break; } } builder.Append(c); c = text.NextChar(); } else { // Parse until we match a space or a special html character int matchCount = 0; while (true) { if (c == '\0') { return(false); } if (c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`') { break; } matchCount++; builder.Append(c); c = text.NextChar(); } // We need at least one char after '=' if (matchCount == 0) { return(false); } } hasAttribute = false; continue; default: if (!hasWhitespaces) { return(false); } // Parse the attribute name if (!(c.IsAlpha() || c == '_' || c == ':')) { return(false); } builder.Append(c); while (true) { c = text.NextChar(); if (c.IsAlphaNumeric() || c == '_' || c == ':' || c == '.' || c == '-') { builder.Append(c); } else { break; } } hasAttribute = true; break; } } }
public static bool TryParseHtmlTag(StringSlice text, out string htmlTag) { return(TryParseHtmlTag(ref text, out htmlTag)); }
/// <summary> /// Destructively unescape a string: remove backslashes before punctuation or symbol characters. /// </summary> /// <param name="text">The string data that will be changed by unescaping any punctuation or symbol characters.</param> /// <param name="removeBackSlash">if set to <c>true</c> [remove back slash].</param> /// <returns></returns> public static string Unescape(string text, bool removeBackSlash = true) { // Credits: code from CommonMark.NET // Copyright (c) 2014, Kārlis Gaņģis All rights reserved. // See license for details: https://github.com/Knagis/CommonMark.NET/blob/master/LICENSE.md if (string.IsNullOrEmpty(text)) { return(string.Empty); } // remove backslashes before punctuation chars: int searchPos = 0; int lastPos = 0; char c; char[] search = removeBackSlash ? SearchBackAndAmp : SearchAmp; StringBuilder sb = null; while ((searchPos = text.IndexOfAny(search, searchPos)) != -1) { if (sb == null) { sb = StringBuilderCache.Local(); sb.Length = 0; } c = text[searchPos]; if (removeBackSlash && c == '\\') { searchPos++; if (text.Length == searchPos) { break; } c = text[searchPos]; if (c.IsEscapableSymbol()) { sb.Append(text, lastPos, searchPos - lastPos - 1); lastPos = searchPos; } } else if (c == '&') { int entityNameStart; int entityNameLength; int numericEntity; var match = ScanEntity(new StringSlice(text, searchPos, text.Length - 1), out numericEntity, out entityNameStart, out entityNameLength); if (match == 0) { searchPos++; } else { searchPos += match; if (entityNameLength > 0) { var namedEntity = new StringSlice(text, entityNameStart, entityNameStart + entityNameLength - 1); var decoded = EntityHelper.DecodeEntity(namedEntity.ToString()); if (decoded != null) { sb.Append(text, lastPos, searchPos - match - lastPos); sb.Append(decoded); lastPos = searchPos; } } else if (numericEntity >= 0) { sb.Append(text, lastPos, searchPos - match - lastPos); if (numericEntity == 0) { sb.Append('\0'.EscapeInsecure()); } else { var decoded = EntityHelper.DecodeEntity(numericEntity); if (decoded != null) { sb.Append(decoded); } else { sb.Append('\uFFFD'); } } lastPos = searchPos; } } } } if (sb == null) { return(text); } sb.Append(text, lastPos, text.Length - lastPos); var result = sb.ToString(); sb.Length = 0; return(result); }
public static bool TryParseInlineLink(ref StringSlice text, out string link, out string title, out SourceSpan linkSpan, out SourceSpan titleSpan) { // 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 3. an optional link destination, // 4. an optional link title separated from the link destination by whitespace, // 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 6. and a right parenthesis ) bool isValid = false; var c = text.CurrentChar; link = null; title = null; linkSpan = SourceSpan.Empty; titleSpan = SourceSpan.Empty; // 1. An inline link consists of a link text followed immediately by a left parenthesis (, if (c == '(') { text.NextChar(); text.TrimStart(); var pos = text.Start; if (TryParseUrl(ref text, out link)) { linkSpan.Start = pos; linkSpan.End = text.Start - 1; if (linkSpan.End < linkSpan.Start) { linkSpan = SourceSpan.Empty; } int spaceCount; text.TrimStart(out spaceCount); var hasWhiteSpaces = spaceCount > 0; c = text.CurrentChar; if (c == ')') { isValid = true; } else if (hasWhiteSpaces) { c = text.CurrentChar; pos = text.Start; if (c == ')') { isValid = true; } else if (TryParseTitle(ref text, out title)) { titleSpan.Start = pos; titleSpan.End = text.Start - 1; if (titleSpan.End < titleSpan.Start) { titleSpan = SourceSpan.Empty; } text.TrimStart(); c = text.CurrentChar; if (c == ')') { isValid = true; } } } } } if (isValid) { // Skip ')' text.NextChar(); title = title ?? String.Empty; } return(isValid); }
public static bool TryParseInlineLink(StringSlice text, out string link, out string title, out SourceSpan linkSpan, out SourceSpan titleSpan) { return(TryParseInlineLink(ref text, out link, out title, out linkSpan, out titleSpan)); }
public static bool TryParseInlineLink(StringSlice text, out string link, out string title) { return(TryParseInlineLink(ref text, out link, out title, out _, out _)); }
public static bool TryParseAutolink(StringSlice text, out string link, out bool isEmail) { return(TryParseAutolink(ref text, out link, out isEmail)); }
public static bool TryParseAutolink(ref StringSlice text, out string link, out bool isEmail) { link = null; isEmail = false; var c = text.CurrentChar; if (c != '<') { return(false); } // An absolute URI, for these purposes, consists of a scheme followed by a colon (:) // followed by zero or more characters other than ASCII whitespace and control characters, <, and >. // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space). // A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop. // a scheme is any sequence of 2–32 characters // beginning with an ASCII letter // and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”). // An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec: // /^ // [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ // @ // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ c = text.NextChar(); // -1: scan email // 0: scan uri or email // +1: scan uri int state = 0; if (!c.IsAlpha()) { // We may have an email char? if (c.IsDigit() || CharHelper.IsEmailUsernameSpecialChar(c)) { state = -1; } else { return(false); } } var builder = StringBuilderCache.Local(); // **************************** // 1. Scan scheme or user email // **************************** builder.Append(c); while (true) { c = text.NextChar(); // Chars valid for both scheme and email var isSpecialChar = c == '+' || c == '.' || c == '-'; var isValidChar = c.IsAlphaNumeric() || isSpecialChar; if (state <= 0 && CharHelper.IsEmailUsernameSpecialChar(c)) { isValidChar = true; // If this is not a special char valid also for url scheme, then we have an email if (!isSpecialChar) { state = -1; } } if (isValidChar) { // a scheme is any sequence of 2–32 characters if (state > 0 && builder.Length >= 32) { builder.Length = 0; return(false); } builder.Append(c); } else if (c == ':') { if (state < 0 || builder.Length <= 2) { builder.Length = 0; return(false); } state = 1; break; } else if (c == '@') { if (state > 0) { builder.Length = 0; return(false); } state = -1; break; } else { builder.Length = 0; return(false); } } // append ':' or '@' builder.Append(c); if (state < 0) { isEmail = true; // scan an email // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ bool hasMinus = false; int domainCharCount = 0; char pc = '\0'; while (true) { c = text.NextChar(); if (c == '>') { if (domainCharCount == 0 || hasMinus) { break; } text.NextChar(); link = builder.ToString(); builder.Length = 0; return(true); } if (c.IsAlphaNumeric() || (domainCharCount > 0 && (hasMinus = c == '-'))) { domainCharCount++; if (domainCharCount > 63) { break; } } else if (c == '.') { if (pc == '.' || pc == '-') { break; } domainCharCount = 0; hasMinus = false; } else { break; } builder.Append(c); pc = c; } } else { // scan an uri // An absolute URI, for these purposes, consists of a scheme followed by a colon (:) // followed by zero or more characters other than ASCII whitespace and control characters, <, and >. // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space). while (true) { c = text.NextChar(); if (c == '\0') { break; } if (c == '>') { text.NextChar(); link = builder.ToString(); builder.Length = 0; return(true); } // Chars valid for both scheme and email if (c <= 127) { if (c > ' ' && c != '>') { builder.Append(c); } else { break; } } else if (!c.IsSpaceOrPunctuation()) { builder.Append(c); } else { break; } } } builder.Length = 0; return(false); }