/// <summary> /// Try to match a URL in a link or reference, return number of chars matched. /// This may optionally be contained in <..>; otherwise /// whitespace and unbalanced right parentheses aren't allowed. /// Newlines aren't ever allowed. /// </summary> public static int scan_link_url(string s, int pos, int sourceLength) { /*!re2c * [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } * [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } * .? { return 0; } */ if (pos + 1 >= sourceLength) { return(0); } var i = pos; var c = s[i]; var nextEscaped = false; var lastPos = sourceLength - 1; // move past any whitespaces ScannerCharacterMatcher.MatchWhitespaces(s, ref c, ref i, lastPos); if (c == '<') { if (i == lastPos) { return(0); } c = s[++i]; while (i <= lastPos) { if (c == '\n' || c == ' ') { return(0); } if (c == '<' && !nextEscaped) { return(0); } if (c == '>' && !nextEscaped) { return(i - pos + 1); } if (i == lastPos) { return(0); } nextEscaped = !nextEscaped && c == '\\'; c = s[++i]; } return(0); } bool openParens = false; while (i <= lastPos) { if (c == '(' && !nextEscaped) { if (openParens) { return(0); } openParens = true; } if (c == ')' && !nextEscaped) { if (!openParens) { return(i - pos); } openParens = false; } if (c <= 0x20) { return(openParens ? 0 : i - pos); } if (i == lastPos) { return(openParens ? 0 : i - pos + 1); } nextEscaped = !nextEscaped && c == '\\'; c = s[++i]; } return(0); }
private static int _scanHtmlTagOpenTag(string s, int pos, int sourceLength) { var lastPosition = sourceLength - 1; // the minimum length valid tag is "a>" if (lastPosition < pos + 1) { return(0); } // currentPosition - positioned after the last character matched by that any particular part var currentPosition = pos; // stores the character at the current position char currentChar = s[currentPosition]; // stores if the previous character was a whitespace bool hadWhitespace = false; // stores if an attribute name has been parsed bool hadAttribute = false; // some additional variables used in the process char c1; // The tag name must start with an ASCII letter if (!ScannerCharacterMatcher.MatchAsciiLetter(s, ref currentChar, ref currentPosition, lastPosition)) { return(0); } // Move past any other characters that make up the tag name ScannerCharacterMatcher.MatchHtmlTagNameCharacter(s, ref currentChar, ref currentPosition, lastPosition); // loop while the end of string is reached or the tag is closed while (currentPosition <= lastPosition) { // Move past any whitespaces hadWhitespace = ScannerCharacterMatcher.MatchWhitespaces(s, ref currentChar, ref currentPosition, lastPosition); // check if the end of the tag has been reached if (currentChar == '>') { return(currentPosition - pos + 1); } if (currentChar == '/') { if (currentPosition == lastPosition) { return(0); } currentChar = s[++currentPosition]; return((currentChar == '>') ? currentPosition - pos + 1 : 0); } // check if arrived at the attribute value if (currentChar == '=') { if (!hadAttribute || currentPosition == lastPosition) { return(0); } // move past the '=' symbol and any whitespaces currentChar = s[++currentPosition]; ScannerCharacterMatcher.MatchWhitespaces(s, ref currentChar, ref currentPosition, lastPosition); if (currentChar == '\'' || currentChar == '\"') { c1 = currentChar; currentChar = s[++currentPosition]; ScannerCharacterMatcher.MatchAnythingExcept(s, ref currentChar, ref currentPosition, lastPosition, c1); if (currentChar != c1 || currentPosition == lastPosition) { return(0); } currentChar = s[++currentPosition]; } else { // an unquoted value must have at least one character if (!ScannerCharacterMatcher.MatchAnythingExceptWhitespaces(s, ref currentChar, ref currentPosition, lastPosition, '\"', '\'', '=', '<', '>', '`')) { return(0); } } hadAttribute = false; continue; } // the attribute must be preceded by a whitespace if (!hadWhitespace) { return(0); } // if the end has not been found then there is just one possible alternative - an attribute // validate that the attribute name starts with a correct character if (!ScannerCharacterMatcher.MatchAsciiLetter(s, ref currentChar, ref currentPosition, lastPosition, '_', ':')) { return(0); } // match any remaining characters in the attribute name ScannerCharacterMatcher.MatchAsciiLetterOrDigit(s, ref currentChar, ref currentPosition, lastPosition, '_', ':', '.', '-'); hadAttribute = true; } return(0); }