private static string GetNestedParensPattern() { if (Markdown._nestedParensPattern == null) { Markdown._nestedParensPattern = string.Concat(Markdown.RepeatString("\r\n (?> # Atomic matching\r\n [^()\\s]+ # Anything other than parens or whitespace\r\n |\r\n \\(\r\n ", 6), Markdown.RepeatString(" \\)\r\n )*", 6)); } return(Markdown._nestedParensPattern); }
private static string GetNestedBracketsPattern() { if (Markdown._nestedBracketsPattern == null) { Markdown._nestedBracketsPattern = string.Concat(Markdown.RepeatString("\r\n (?> # Atomic matching\r\n [^\\[\\]]+ # Anything other than brackets\r\n |\r\n \\[\r\n ", 6), Markdown.RepeatString(" \\]\r\n )*", 6)); } return(Markdown._nestedBracketsPattern); }
private static string GetBlockPattern() { string str = "ins|del"; string str1 = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math"; string str2 = "\r\n (?>\t\t\t\t # optional tag attributes\r\n \\s\t\t\t # starts with whitespace\r\n (?>\r\n [^>\"/]+\t # text outside quotes\r\n |\r\n /+(?!>)\t\t # slash not followed by >\r\n |\r\n \"[^\"]*\"\t\t # text inside double quotes (tolerate >)\r\n |\r\n '[^']*'\t # text inside single quotes (tolerate >)\r\n )*\r\n )?\t\r\n "; string str3 = string.Concat(Markdown.RepeatString(string.Concat("\r\n (?>\r\n [^<]+\t\t\t # content without tag\r\n |\r\n <\\2\t\t\t # nested opening tag\r\n ", str2, " # attributes\r\n (?>\r\n />\r\n |\r\n >"), 6), ".*?", Markdown.RepeatString("\r\n </\\2\\s*>\t # closing nested tag\r\n )\r\n |\t\t\t\t\r\n <(?!/\\2\\s*> # other tags with a different name\r\n )\r\n )*", 6)); string str4 = str3.Replace("\\2", "\\3"); string str5 = "\r\n (?>\r\n (?>\r\n (?<=\\n) # Starting after a blank line\r\n | # or\r\n \\A\\n? # the beginning of the doc\r\n )\r\n ( # save in $1\r\n\r\n # Match from `\\n<tag>` to `</tag>\\n`, handling nested tags \r\n # in between.\r\n \r\n [ ]{0,$less_than_tab}\r\n <($block_tags_b_re) # start tag = $2\r\n $attr> # attributes followed by > and \\n\r\n $content # content, support nesting\r\n </\\2> # the matching end tag\r\n [ ]* # trailing spaces\r\n (?=\\n+|\\Z) # followed by a newline or end of document\r\n\r\n | # Special version for tags of group a.\r\n\r\n [ ]{0,$less_than_tab}\r\n <($block_tags_a_re) # start tag = $3\r\n $attr>[ ]*\\n # attributes followed by >\r\n $content2 # content, support nesting\r\n </\\3> # the matching end tag\r\n [ ]* # trailing spaces\r\n (?=\\n+|\\Z) # followed by a newline or end of document\r\n \r\n | # Special case just for <hr />. It was easier to make a special \r\n # case than to make the other regex more complicated.\r\n \r\n [ ]{0,$less_than_tab}\r\n <(hr) # start tag = $2\r\n $attr # attributes\r\n /?> # the matching end tag\r\n [ ]*\r\n (?=\\n{2,}|\\Z) # followed by a blank line or end of document\r\n \r\n | # Special case for standalone HTML comments:\r\n \r\n [ ]{0,$less_than_tab}\r\n (?s:\r\n <!-- .*? -->\r\n )\r\n [ ]*\r\n (?=\\n{2,}|\\Z) # followed by a blank line or end of document\r\n \r\n | # PHP and ASP-style processor instructions (<? and <%)\r\n \r\n [ ]{0,$less_than_tab}\r\n (?s:\r\n <([?%]) # $2\r\n .*?\r\n \\2>\r\n )\r\n [ ]*\r\n (?=\\n{2,}|\\Z) # followed by a blank line or end of document\r\n \r\n )\r\n )"; str5 = str5.Replace("$less_than_tab", 3.ToString()); str5 = str5.Replace("$block_tags_b_re", str1); str5 = str5.Replace("$block_tags_a_re", str); str5 = str5.Replace("$attr", str2); return(str5.Replace("$content2", str4).Replace("$content", str3)); }
static Markdown() { Markdown._newlinesLeadingTrailing = new Regex("^\\n+|\\n+\\z", RegexOptions.Compiled); Markdown._newlinesMultiple = new Regex("\\n{2,}", RegexOptions.Compiled); Markdown._leadingWhitespace = new Regex("^[ ]*", RegexOptions.Compiled); Markdown._linkDef = new Regex(string.Format("\r\n ^[ ]{{0,{0}}}\\[(.+)\\]: # id = $1\r\n [ ]*\r\n \\n? # maybe *one* newline\r\n [ ]*\r\n <?(\\S+?)>? # url = $2\r\n [ ]*\r\n \\n? # maybe one newline\r\n [ ]*\r\n (?:\r\n (?<=\\s) # lookbehind for whitespace\r\n [\"(]\r\n (.+?) # title = $3\r\n [\")]\r\n [ ]*\r\n )? # title is optional\r\n (?:\\n+|\\Z)", 3), RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._blocksHtml = new Regex(Markdown.GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); Markdown._htmlTokens = new Regex(string.Concat("\r\n (<!(?:--.*?--\\s*)+>)| # match <!-- foo -->\r\n (<\\?.*?\\?>)| # match <?foo?> ", Markdown.RepeatString(" \r\n (<[A-Za-z\\/!$](?:[^<>]|", 6), Markdown.RepeatString(")*>)", 6), " # match <tag> and </tag>"), RegexOptions.Multiline | RegexOptions.ExplicitCapture | RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._anchorRef = new Regex(string.Format("\r\n ( # wrap whole match in $1\r\n \\[\r\n ({0}) # link text = $2\r\n \\]\r\n\r\n [ ]? # one optional space\r\n (?:\\n[ ]*)? # one optional newline followed by spaces\r\n\r\n \\[\r\n (.*?) # id = $3\r\n \\]\r\n )", Markdown.GetNestedBracketsPattern()), RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._anchorInline = new Regex(string.Format("\r\n ( # wrap whole match in $1\r\n \\[\r\n ({0}) # link text = $2\r\n \\]\r\n \\( # literal paren\r\n [ ]*\r\n ({1}) # href = $3\r\n [ ]*\r\n ( # $4\r\n (['\"]) # quote char = $5\r\n (.*?) # title = $6\r\n \\5 # matching quote\r\n [ ]* # ignore any spaces between closing quote and )\r\n )? # title is optional\r\n \\)\r\n )", Markdown.GetNestedBracketsPattern(), Markdown.GetNestedParensPattern()), RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._anchorRefShortcut = new Regex("\r\n ( # wrap whole match in $1\r\n \\[\r\n ([^\\[\\]]+) # link text = $2; can't contain [ or ]\r\n \\]\r\n )", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._imagesRef = new Regex("\r\n ( # wrap whole match in $1\r\n !\\[\r\n (.*?) # alt text = $2\r\n \\]\r\n\r\n [ ]? # one optional space\r\n (?:\\n[ ]*)? # one optional newline followed by spaces\r\n\r\n \\[\r\n (.*?) # id = $3\r\n \\]\r\n\r\n )", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._imagesInline = new Regex(string.Format("\r\n ( # wrap whole match in $1\r\n !\\[\r\n (.*?) # alt text = $2\r\n \\]\r\n \\s? # one optional whitespace character\r\n \\( # literal paren\r\n [ ]*\r\n ({0}) # href = $3\r\n [ ]*\r\n ( # $4\r\n (['\"]) # quote char = $5\r\n (.*?) # title = $6\r\n \\5 # matching quote\r\n [ ]*\r\n )? # title is optional\r\n \\)\r\n )", Markdown.GetNestedParensPattern()), RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._headerSetext = new Regex("\r\n ^(.+?)\r\n [ ]*\r\n \\n\r\n (=+|-+) # $1 = string of ='s or -'s\r\n [ ]*\r\n \\n+", RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._headerAtx = new Regex("\r\n ^(\\#{1,6}) # $1 = string of #'s\r\n [ ]*\r\n (.+?) # $2 = Header text\r\n [ ]*\r\n \\#* # optional closing #'s (not counted)\r\n \\n+", RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._horizontalRules = new Regex("\r\n ^[ ]{0,3} # Leading space\r\n ([-*_]) # $1: First marker\r\n (?> # Repeated marker group\r\n [ ]{0,2} # Zero, one, or two spaces.\r\n \\1 # Marker character\r\n ){2,} # Group repeated at least twice\r\n [ ]* # Trailing spaces\r\n $ # End of line.\r\n ", RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._wholeList = string.Format("\r\n ( # $1 = whole list\r\n ( # $2\r\n [ ]{{0,{1}}}\r\n ({0}) # $3 = first list item marker\r\n [ ]+\r\n )\r\n (?s:.+?)\r\n ( # $4\r\n \\z\r\n |\r\n \\n{{2,}}\r\n (?=\\S)\r\n (?! # Negative lookahead for another list item marker\r\n [ ]*\r\n {0}[ ]+\r\n )\r\n )\r\n )", string.Format("(?:{0}|{1})", "[*+-]", "\\d+[.]"), 3); Markdown._listNested = new Regex(string.Concat("^", Markdown._wholeList), RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._listTopLevel = new Regex(string.Concat("(?:(?<=\\n\\n)|\\A\\n?)", Markdown._wholeList), RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._codeBlock = new Regex(string.Format("\r\n (?:\\n\\n|\\A\\n?)\r\n ( # $1 = the code block -- one or more lines, starting with a space\r\n (?:\r\n (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces\r\n .*\\n+\r\n )+\r\n )\r\n ((?=^[ ]{{0,{0}}}\\S)|\\Z) # Lookahead for non-space at line-start, or end of doc", 4), RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._codeSpan = new Regex("\r\n (?<!\\\\) # Character before opening ` can't be a backslash\r\n (`+) # $1 = Opening run of `\r\n (.+?) # $2 = The code block\r\n (?<!`)\r\n \\1\r\n (?!`)", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._bold = new Regex("(\\*\\*|__) (?=\\S) (.+?[*_]*) (?<=\\S) \\1", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._strictBold = new Regex("([\\W_]|^) (\\*\\*|__) (?=\\S) ([^\\r]*?\\S[\\*_]*) \\2 ([\\W_]|$)", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._italic = new Regex("(\\*|_) (?=\\S) (.+?) (?<=\\S) \\1", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._strictItalic = new Regex("([\\W_]|^) (\\*|_) (?=\\S) ([^\\r\\*_]*?\\S) \\2 ([\\W_]|$)", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); Markdown._blockquote = new Regex("\r\n ( # Wrap whole match in $1\r\n (\r\n ^[ ]*>[ ]? # '>' at the start of a line\r\n .+\\n # rest of the first line\r\n (.+\\n)* # subsequent consecutive lines\r\n \\n* # blanks\r\n )+\r\n )", RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); Markdown._autolinkBare = new Regex("(^|\\s)(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\\[\\]\\(\\)!:,\\.;]*[-A-Z0-9+&@#/%=~_|\\[\\]])($|\\W)", RegexOptions.IgnoreCase | RegexOptions.Compiled); Markdown._outDent = new Regex(string.Concat("^[ ]{1,", 4, "}"), RegexOptions.Multiline | RegexOptions.Compiled); Markdown._codeEncoder = new Regex("&|<|>|\\\\|\\*|_|\\{|\\}|\\[|\\]", RegexOptions.Compiled); Markdown._amps = new Regex("&(?!(#[0-9]+)|(#[xX][a-fA-F0-9])|([a-zA-Z][a-zA-Z0-9]*);)", RegexOptions.ExplicitCapture | RegexOptions.Compiled); Markdown._angles = new Regex("<(?![A-Za-z/?\\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled); Markdown._unescapes = new Regex("\u001a\\d+\u001a", RegexOptions.Compiled); Markdown._problemUrlChars = "\"'*()[]$:".ToCharArray(); Markdown._escapeTable = new Dictionary <string, string>(); Markdown._invertedEscapeTable = new Dictionary <string, string>(); Markdown._backslashEscapeTable = new Dictionary <string, string>(); string str = ""; string str1 = "\\`*_{}[]()>#+-.!"; for (int i = 0; i < str1.Length; i++) { string str2 = str1[i].ToString(); string hashKey = Markdown.GetHashKey(str2); Markdown._escapeTable.Add(str2, hashKey); Markdown._invertedEscapeTable.Add(hashKey, str2); Markdown._backslashEscapeTable.Add(string.Concat("\\", str2), hashKey); str = string.Concat(str, Regex.Escape(string.Concat("\\", str2)), "|"); } Markdown._backslashEscapes = new Regex(str.Substring(0, str.Length - 1), RegexOptions.Compiled); }