// Parse a link or the link portion of an image, or return a fallback. static Reference ParseLinkDetails(Subject subj, bool supportPlaceholderBrackets) { int n; int sps; int endlabel, starturl, endurl, starttitle, endtitle, endall; string url, title; endlabel = subj.Position; var c = peek_char(subj); if (c == '(' && ((sps = Scanner.scan_spacechars(subj.Buffer, subj.Position + 1, subj.Length)) > -1) && ((n = Scanner.scan_link_url(subj.Buffer, subj.Position + 1 + sps, subj.Length)) > -1)) { // try to parse an explicit link: starturl = subj.Position + 1 + sps; // after ( endurl = starturl + n; starttitle = endurl + Scanner.scan_spacechars(subj.Buffer, endurl, subj.Length); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + Scanner.scan_link_title(subj.Buffer, starttitle, subj.Length); endall = endtitle + Scanner.scan_spacechars(subj.Buffer, endtitle, subj.Length); if (endall < subj.Length && subj.Buffer[endall] == ')') { subj.Position = endall + 1; url = subj.Buffer.Substring(starturl, endurl - starturl); url = CleanUrl(url); title = subj.Buffer.Substring(starttitle, endtitle - starttitle); title = CleanTitle(title); return(new Reference() { Title = title, Url = url }); } } else if (c == '[') { var label = ParseReferenceLabel(subj); if (label != null) { if (label.Value.Length == 0) { return(Reference.SelfReference); } var details = LookupReference(subj.DocumentData, label.Value); if (details != null) { return(details); } // rollback the subject but return InvalidReference so that the caller knows not to // parse 'foo' from [foo][bar]. subj.Position = endlabel; return(Reference.InvalidReference); } } // rollback the subject position because didn't match anything. subj.Position = endlabel; if (supportPlaceholderBrackets) { return(new Reference() { Url = subj.Buffer.Substring(subj.LastPendingInline.StartPosition, subj.Position - subj.LastPendingInline.StartPosition - 1), IsPlaceholder = true }); } else { return(null); } }
// Process one line at a time, modifying a block. // Returns 0 if successful. curptr is changed to point to // the currently open block. public static void IncorporateLine(LineInfo line, ref Block curptr) { var ln = line.Line; Block last_matched_container; // offset is the char position in the line var offset = 0; // column is the virtual position in the line that takes TAB expansion into account var column = 0; // the adjustment to the virtual position `column` that points to the number of spaces from the TAB that have not been included in any indent. var remainingSpaces = 0; // the char position of the first non-space char int first_nonspace; // the virtual position of the first non-space chart, that includes TAB expansion int first_nonspace_column; int matched; int i; ListData data; bool all_matched = true; Block cur = curptr; var blank = false; char curChar; int indent; // container starts at the document root. var container = cur.Top; // for each containing block, try to parse the associated line start. // bail out on failure: container will point to the last matching block. while (container.LastChild != null && container.LastChild.IsOpen) { container = container.LastChild; FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); indent = first_nonspace_column - column + remainingSpaces; blank = curChar == '\n'; switch (container.Tag) { case BlockTag.BlockQuote: { if (indent <= 3 && curChar == '>') { AdvanceOffset(ln, indent + 1, true, ref offset, ref column, ref remainingSpaces); AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); } else { all_matched = false; } break; } case BlockTag.ListItem: { if (indent >= container.ListData.MarkerOffset + container.ListData.Padding) { AdvanceOffset(ln, container.ListData.MarkerOffset + container.ListData.Padding, true, ref offset, ref column, ref remainingSpaces); } else if (blank && container.FirstChild != null) { // if container->first_child is NULL, then the opening line // of the list item was blank after the list marker; in this // case, we are done with the list item. AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces); } else { all_matched = false; } break; } case BlockTag.IndentedCode: { if (indent >= CODE_INDENT) { AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); } else if (blank) { AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces); } else { all_matched = false; } break; } case BlockTag.AtxHeading: case BlockTag.SetextHeading: { // a heading can never contain more than one line all_matched = false; if (blank) { container.IsLastLineBlank = true; } break; } case BlockTag.FencedCode: { // -1 means we've seen closer if (container.FencedCodeData.FenceLength == -1) { all_matched = false; if (blank) { container.IsLastLineBlank = true; } } else { // skip optional spaces of fence offset i = container.FencedCodeData.FenceOffset; while (i > 0 && ln[offset] == ' ') { offset++; column++; i--; } } break; } case BlockTag.HtmlBlock: { // all other block types can accept blanks if (blank && container.HtmlBlockType >= HtmlBlockType.InterruptingBlock) { container.IsLastLineBlank = true; all_matched = false; } break; } case BlockTag.Paragraph: { if (blank) { container.IsLastLineBlank = true; all_matched = false; } break; } } if (!all_matched) { container = container.Parent; // back up to last matching block break; } } last_matched_container = container; // check to see if we've hit 2nd blank line, break out of list: if (blank && container.IsLastLineBlank) { BreakOutOfLists(ref container, line); } var maybeLazy = cur.Tag == BlockTag.Paragraph; // unless last matched container is code block, try new container starts: while (container.Tag != BlockTag.FencedCode && container.Tag != BlockTag.IndentedCode && container.Tag != BlockTag.HtmlBlock) { FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); indent = first_nonspace_column - column; blank = curChar == '\n'; var indented = indent >= CODE_INDENT; if (!indented && curChar == '>') { AdvanceOffset(ln, first_nonspace + 1 - offset, false, ref offset, ref column, ref remainingSpaces); AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); container = CreateChildBlock(container, line, BlockTag.BlockQuote, first_nonspace); } else if (!indented && curChar == '#' && 0 != (matched = Scanner.scan_atx_heading_start(ln, first_nonspace, ln.Length, out i))) { AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); container = CreateChildBlock(container, line, BlockTag.AtxHeading, first_nonspace); container.Heading = new HeadingData(i); } else if (!indented && (curChar == '`' || curChar == '~') && 0 != (matched = Scanner.scan_open_code_fence(ln, first_nonspace, ln.Length))) { container = CreateChildBlock(container, line, BlockTag.FencedCode, first_nonspace); container.FencedCodeData = new FencedCodeData(); container.FencedCodeData.FenceChar = curChar; container.FencedCodeData.FenceLength = matched; container.FencedCodeData.FenceOffset = first_nonspace - offset; AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); } else if (!indented && curChar == '<' && (0 != (matched = (int)Scanner.scan_html_block_start(ln, first_nonspace, ln.Length)) || (container.Tag != BlockTag.Paragraph && 0 != (matched = (int)Scanner.scan_html_block_start_7(ln, first_nonspace, ln.Length))) )) { container = CreateChildBlock(container, line, BlockTag.HtmlBlock, first_nonspace); container.HtmlBlockType = (HtmlBlockType)matched; // note, we don't adjust offset because the tag is part of the text } else if (!indented && container.Tag == BlockTag.Paragraph && (curChar == '=' || curChar == '-') && 0 != (matched = Scanner.scan_setext_heading_line(ln, first_nonspace, ln.Length))) { container.Tag = BlockTag.SetextHeading; container.Heading = new HeadingData(matched); AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); } else if (!indented && !(container.Tag == BlockTag.Paragraph && !all_matched) && 0 != (Scanner.scan_thematic_break(ln, first_nonspace, ln.Length))) { // it's only now that we know the line is not part of a setext heading: container = CreateChildBlock(container, line, BlockTag.ThematicBreak, first_nonspace); Finalize(container, line); container = container.Parent; AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); } else if ((!indented || container.Tag == BlockTag.List) && 0 != (matched = ParseListMarker(ln, first_nonspace, out data))) { // compute padding: AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); var prevOffset = offset; var prevColumn = column; var prevRemainingSpaces = remainingSpaces; while (column - prevColumn <= CODE_INDENT) { if (!AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces)) { break; } } // i = number of spaces after marker, up to 5 if (column == prevColumn) { // no spaces at all data.Padding = matched + 1; } else if (column - prevColumn > CODE_INDENT || ln[offset] == '\n') { data.Padding = matched + 1; // too many (or none) spaces, ignoring everything but the first one offset = prevOffset; column = prevColumn; remainingSpaces = prevRemainingSpaces; AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); } else { data.Padding = matched + column - prevColumn; } // check container; if it's a list, see if this list item // can continue the list; otherwise, create a list container. data.MarkerOffset = indent; if (container.Tag != BlockTag.List || !ListsMatch(container.ListData, data)) { container = CreateChildBlock(container, line, BlockTag.List, first_nonspace); container.ListData = data; } // add the list item container = CreateChildBlock(container, line, BlockTag.ListItem, first_nonspace); container.ListData = data; } else if (indented && !maybeLazy && !blank) { AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); container = CreateChildBlock(container, line, BlockTag.IndentedCode, offset); } else { break; } if (AcceptsLines(container.Tag)) { // if it's a line container, it can't contain other containers break; } maybeLazy = false; } // what remains at offset is a text line. add the text to the // appropriate container. FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); indent = first_nonspace_column - column; blank = curChar == '\n'; if (blank && container.LastChild != null) { container.LastChild.IsLastLineBlank = true; } // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container.IsLastLineBlank = (blank && container.Tag != BlockTag.BlockQuote && container.Tag != BlockTag.SetextHeading && container.Tag != BlockTag.FencedCode && !(container.Tag == BlockTag.ListItem && container.FirstChild == null && container.SourcePosition >= line.LineOffset)); Block cont = container; while (cont.Parent != null) { cont.Parent.IsLastLineBlank = false; cont = cont.Parent; } if (cur != last_matched_container && container == last_matched_container && !blank && cur.Tag == BlockTag.Paragraph && cur.StringContent.Length > 0) { AddLine(cur, line, ln, offset, remainingSpaces); } else { // not a lazy continuation // finalize any blocks that were not matched and set cur to container: while (cur != last_matched_container) { Finalize(cur, line); cur = cur.Parent; if (cur == null) { throw new CommonMarkException("Cannot finalize container block. Last matched container tag = " + last_matched_container.Tag); } } if (container.Tag == BlockTag.IndentedCode) { AddLine(container, line, ln, offset, remainingSpaces); } else if (container.Tag == BlockTag.FencedCode) { if ((indent <= 3 && curChar == container.FencedCodeData.FenceChar) && (0 != Scanner.scan_close_code_fence(ln, first_nonspace, container.FencedCodeData.FenceLength, ln.Length))) { // if closing fence, set fence length to -1. it will be closed when the next line is processed. container.FencedCodeData.FenceLength = -1; } else { AddLine(container, line, ln, offset, remainingSpaces); } } else if (container.Tag == BlockTag.HtmlBlock) { AddLine(container, line, ln, offset, remainingSpaces); if (Scanner.scan_html_block_end(container.HtmlBlockType, ln, first_nonspace, ln.Length)) { Finalize(container, line); container = container.Parent; } } else if (blank) { // ??? do nothing } else if (container.Tag == BlockTag.AtxHeading) { int p = ln.Length - 1; // trim trailing spaces while (p >= 0 && (ln[p] == ' ' || ln[p] == '\n')) { p--; } // if string ends in #s, remove these: while (p >= 0 && ln[p] == '#') { p--; } // there must be a space before the last hashtag if (p < 0 || ln[p] != ' ') { p = ln.Length - 1; } AddLine(container, line, ln, first_nonspace, remainingSpaces, p - first_nonspace + 1); Finalize(container, line); container = container.Parent; } else if (AcceptsLines(container.Tag)) { AddLine(container, line, ln, first_nonspace, remainingSpaces); } else if (container.Tag != BlockTag.ThematicBreak && container.Tag != BlockTag.SetextHeading) { // create paragraph container for line container = CreateChildBlock(container, line, BlockTag.Paragraph, first_nonspace); AddLine(container, line, ln, first_nonspace, remainingSpaces); } else { Utilities.Warning("Line {0} with container type {1} did not match any condition:\n\"{2}\"", line.LineNumber, container.Tag, ln); } curptr = container; } }
/// <summary> /// Destructively unescape a string: remove backslashes before punctuation or symbol characters. /// </summary> /// <param name="url">The string data that will be changed by unescaping any punctuation or symbol characters.</param> public static string Unescape(string url) { // remove backslashes before punctuation chars: int searchPos = 0; int lastPos = 0; int match; char c; char[] search = new[] { '\\', '&' }; StringBuilder sb = null; while ((searchPos = url.IndexOfAny(search, searchPos)) != -1) { c = url[searchPos]; if (c == '\\') { searchPos++; if (url.Length == searchPos) { break; } c = url[searchPos]; if (Utilities.IsEscapableSymbol(c)) { if (sb == null) { sb = new StringBuilder(url.Length); } sb.Append(url, lastPos, searchPos - lastPos - 1); lastPos = searchPos; } } else if (c == '&') { string namedEntity; int numericEntity; match = Scanner.scan_entity(url, searchPos, url.Length - searchPos, out namedEntity, out numericEntity); if (match == 0) { searchPos++; } else { searchPos += match; if (namedEntity != null) { var decoded = EntityDecoder.DecodeEntity(namedEntity); if (decoded != null) { if (sb == null) { sb = new StringBuilder(url.Length); } sb.Append(url, lastPos, searchPos - match - lastPos); sb.Append(decoded); lastPos = searchPos; } } else if (numericEntity > 0) { var decoded = EntityDecoder.DecodeEntity(numericEntity); if (decoded != null) { if (sb == null) { sb = new StringBuilder(url.Length); } sb.Append(url, lastPos, searchPos - match - lastPos); sb.Append(decoded); } else { if (sb == null) { sb = new StringBuilder(url.Length); } sb.Append(url, lastPos, searchPos - match - lastPos); sb.Append('\uFFFD'); } lastPos = searchPos; } } } } if (sb == null) { return(url); } sb.Append(url, lastPos, url.Length - lastPos); return(sb.ToString()); }
/// <summary> /// Attempts to parse a list item marker (bullet or enumerated). /// On success, returns length of the marker, and populates /// data with the details. On failure, returns 0. /// </summary> /// <remarks>Original: int parse_list_marker(string ln, int pos, ref ListData dataptr)</remarks> private static int ParseListMarker(string ln, int pos, out ListData data) { char c; int startpos; data = null; var len = ln.Length; startpos = pos; c = ln[pos]; if (c == '+' || c == '•' || ((c == '*' || c == '-') && 0 == Scanner.scan_thematic_break(ln, pos, len))) { pos++; if (pos == len || !Utilities.IsWhitespace(ln[pos])) { return(0); } data = new ListData(); data.BulletChar = c; data.Start = 1; } else if (c >= '0' && c <= '9') { int start = c - '0'; while (pos < len - 1) { c = ln[++pos]; // We limit to 9 digits to avoid overflow, This also seems to be the limit for 'start' in some browsers. if (c >= '0' && c <= '9' && start < 100000000) { start = start * 10 + (c - '0'); } else { break; } } if (pos >= len - 1 || (c != '.' && c != ')')) { return(0); } pos++; if (pos == len || !Utilities.IsWhitespace(ln[pos])) { return(0); } data = new ListData(); data.ListType = ListType.Ordered; data.BulletChar = '\0'; data.Start = start; data.Delimiter = (c == '.' ? ListDelimiter.Period : ListDelimiter.Parenthesis); } else { return(0); } return(pos - startpos); }
// Parse reference. Assumes string begins with '[' character. // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. public static int ParseReference(Subject subj) { string url = null; string title = null; int matchlen = 0; int beforetitle; var startPos = subj.Position; // parse label: var lab = ParseReferenceLabel(subj); if (lab == null || lab.Value.Length > Reference.MaximumReferenceLabelLength) { goto INVALID; } // colon: if (peek_char(subj) == ':') { subj.Position++; } else { goto INVALID; } // parse link url: spnl(subj); matchlen = Scanner.scan_link_url(subj.Buffer, subj.Position, subj.Length); if (matchlen == 0) { goto INVALID; } url = subj.Buffer.Substring(subj.Position, matchlen); url = CleanUrl(url); subj.Position += matchlen; // parse optional link_title beforetitle = subj.Position; spnl(subj); matchlen = Scanner.scan_link_title(subj.Buffer, subj.Position, subj.Length); if (matchlen > 0) { title = subj.Buffer.Substring(subj.Position, matchlen); title = CleanTitle(title); subj.Position += matchlen; } else { subj.Position = beforetitle; title = string.Empty; } // parse final spaces and newline: while (peek_char(subj) == ' ') { subj.Position++; } if (peek_char(subj) == '\n') { subj.Position++; } else if (peek_char(subj) != '\0') { goto INVALID; } // insert reference into refmap AddReference(subj.ReferenceMap, lab.Value, url, title); return(subj.Position); INVALID: subj.Position = startPos; return(0); }