/// <summary> /// Try to match email autolink after first <, returning num of chars matched. /// </summary> public static int scan_autolink_email(string s, int pos, int sourceLength) { /*!re2c * [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ * [@] * [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? * ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* * [>] { return (p - start); } * .? { return 0; } */ if (pos + 6 >= sourceLength) { return(0); } char c = s[pos]; if (c == '@') { return(0); } int i = pos; int ln = sourceLength - 1; while (i <= ln) { if (c == '@') { break; } if ((c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') && ".!#$%&'*+/=?^_`{|}~-".IndexOf(c) == -1) { return(0); } if (i == ln) { return(0); } c = s[++i]; } // move past '@' if (i == ln) { return(0); } c = s[++i]; bool hadDot = false; while (true) { var domainStart = i; if (!ScannerCharacterMatcher.MatchAsciiLetterOrDigit(s, ref c, ref i, ln, '-')) { return(0); } if (s[i - 1] == '-' || i - domainStart > 63) { return(0); } if (c == '>') { return(hadDot ? i - pos + 1 : 0); } if (c != '.' || i == ln) { return(0); } hadDot = true; c = s[++i]; } }
private static int _scanHtmlTagOpenTag(string s, int pos, int sourceLength) { var lastPosition = sourceLength - 1; // the minimum length valid tag is "a>" if (lastPosition < pos + 1) { return(0); } // currentPosition - positioned after the last character matched by that any particular part var currentPosition = pos; // stores the character at the current position char currentChar = s[currentPosition]; // stores if the previous character was a whitespace bool hadWhitespace = false; // stores if an attribute name has been parsed bool hadAttribute = false; // some additional variables used in the process char c1; // The tag name must start with an ASCII letter if (!ScannerCharacterMatcher.MatchAsciiLetter(s, ref currentChar, ref currentPosition, lastPosition)) { return(0); } // Move past any other characters that make up the tag name ScannerCharacterMatcher.MatchHtmlTagNameCharacter(s, ref currentChar, ref currentPosition, lastPosition); // loop while the end of string is reached or the tag is closed while (currentPosition <= lastPosition) { // Move past any whitespaces hadWhitespace = ScannerCharacterMatcher.MatchWhitespaces(s, ref currentChar, ref currentPosition, lastPosition); // check if the end of the tag has been reached if (currentChar == '>') { return(currentPosition - pos + 1); } if (currentChar == '/') { if (currentPosition == lastPosition) { return(0); } currentChar = s[++currentPosition]; return((currentChar == '>') ? currentPosition - pos + 1 : 0); } // check if arrived at the attribute value if (currentChar == '=') { if (!hadAttribute || currentPosition == lastPosition) { return(0); } // move past the '=' symbol and any whitespaces currentChar = s[++currentPosition]; ScannerCharacterMatcher.MatchWhitespaces(s, ref currentChar, ref currentPosition, lastPosition); if (currentChar == '\'' || currentChar == '\"') { c1 = currentChar; currentChar = s[++currentPosition]; ScannerCharacterMatcher.MatchAnythingExcept(s, ref currentChar, ref currentPosition, lastPosition, c1); if (currentChar != c1 || currentPosition == lastPosition) { return(0); } currentChar = s[++currentPosition]; } else { // an unquoted value must have at least one character if (!ScannerCharacterMatcher.MatchAnythingExceptWhitespaces(s, ref currentChar, ref currentPosition, lastPosition, '\"', '\'', '=', '<', '>', '`')) { return(0); } } hadAttribute = false; continue; } // the attribute must be preceded by a whitespace if (!hadWhitespace) { return(0); } // if the end has not been found then there is just one possible alternative - an attribute // validate that the attribute name starts with a correct character if (!ScannerCharacterMatcher.MatchAsciiLetter(s, ref currentChar, ref currentPosition, lastPosition, '_', ':')) { return(0); } // match any remaining characters in the attribute name ScannerCharacterMatcher.MatchAsciiLetterOrDigit(s, ref currentChar, ref currentPosition, lastPosition, '_', ':', '.', '-'); hadAttribute = true; } return(0); }