// Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. static Syntax.Inline handle_newline(Subject subj) { int nlpos = subj.Position; // skip over newline advance(subj); // skip spaces at beginning of line while (peek_char(subj) == ' ') { advance(subj); } if (nlpos > 1 && BString.bchar(subj.Buffer, nlpos - 1) == ' ' && BString.bchar(subj.Buffer, nlpos - 2) == ' ') { return make_linebreak(); } else { return make_softbreak(); } }
// Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. static Inline handle_pointy_brace(Subject subj) { int matchlen; string contents; // advance past first < subj.Position++; // first try to match a URL autolink matchlen = Scanner.scan_autolink_uri(subj.Buffer, subj.Position, subj.Length); if (matchlen > 0) { contents = subj.Buffer.Substring(subj.Position, matchlen - 1); var resultContents = ParseStringEntities(contents); var result = Inline.CreateLink(resultContents, contents, string.Empty); result.SourcePosition = subj.Position - 1; resultContents.SourcePosition = subj.Position; subj.Position += matchlen; result.SourceLastPosition = subj.Position; resultContents.SourceLastPosition = subj.Position - 1; return result; } // next try to match an email autolink matchlen = Scanner.scan_autolink_email(subj.Buffer, subj.Position, subj.Length); if (matchlen > 0) { contents = subj.Buffer.Substring(subj.Position, matchlen - 1); var resultContents = ParseStringEntities(contents); var result = Inline.CreateLink(resultContents, "mailto:" + contents, string.Empty); result.SourcePosition = subj.Position - 1; resultContents.SourcePosition = subj.Position; subj.Position += matchlen; result.SourceLastPosition = subj.Position; resultContents.SourceLastPosition = subj.Position - 1; return result; } // finally, try to match an html tag matchlen = Scanner.scan_html_tag(subj.Buffer, subj.Position, subj.Length); if (matchlen > 0) { var result = new Inline(InlineTag.RawHtml, subj.Buffer, subj.Position - 1, matchlen + 1); result.SourcePosition = subj.Position - 1; subj.Position += matchlen; result.SourceLastPosition = subj.Position; return result; } else { // if nothing matches, just return the opening <: return new Inline("<", subj.Position - 1, subj.Position); } }
public static void Finalize(Block b, LineInfo line) { // don't do anything if the block is already closed if (!b.IsOpen) return; b.IsOpen = false; if (line.IsTrackingPositions) { // (b.SourcePosition >= line.LineOffset) determines if the block started on this line. if (b.SourcePosition >= line.LineOffset && line.Line != null) b.SourceLastPosition = line.CalculateOrigin(line.Line.Length, false); else b.SourceLastPosition = line.CalculateOrigin(0, false); } #pragma warning disable 0618 b.EndLine = (line.LineNumber > b.StartLine) ? line.LineNumber - 1 : line.LineNumber; #pragma warning restore 0618 switch (b.Tag) { case BlockTag.Paragraph: var sc = b.StringContent; if (!sc.StartsWith('[')) break; var subj = new Subject(b.Top.ReferenceMap); sc.FillSubject(subj); var origPos = subj.Position; while (subj.Position < subj.Buffer.Length && subj.Buffer[subj.Position] == '[' && 0 != InlineMethods.ParseReference(subj)) { } if (subj.Position != origPos) { sc.Replace(subj.Buffer, subj.Position, subj.Buffer.Length - subj.Position); if (sc.PositionTracker != null) sc.PositionTracker.AddBlockOffset(subj.Position - origPos); if (Utilities.IsFirstLineBlank(subj.Buffer, subj.Position)) b.Tag = BlockTag.ReferenceDefinition; } break; case BlockTag.IndentedCode: b.StringContent.RemoveTrailingBlankLines(); break; case BlockTag.FencedCode: // first line of contents becomes info var firstlinelen = b.StringContent.IndexOf('\n') + 1; b.FencedCodeData.Info = InlineMethods.Unescape(b.StringContent.TakeFromStart(firstlinelen, true).Trim()); break; case BlockTag.List: // determine tight/loose status b.ListData.IsTight = true; // tight by default var item = b.FirstChild; Block subitem; while (item != null) { // check for non-final non-empty list item ending with blank line: if (item.IsLastLineBlank && item.NextSibling != null) { b.ListData.IsTight = false; break; } // recurse into children of list item, to see if there are spaces between them: subitem = item.FirstChild; while (subitem != null) { if (EndsWithBlankLine(subitem) && (item.NextSibling != null || subitem.NextSibling != null)) { b.ListData.IsTight = false; break; } subitem = subitem.NextSibling; } if (!b.ListData.IsTight) break; item = item.NextSibling; } break; } }
// Parse backslash-escape or just a backslash, returning an inline. private static Inline handle_backslash(Subject subj) { subj.Position++; if (subj.Position >= subj.Length) return new Inline("\\", subj.Position - 1, subj.Position); var nextChar = subj.Buffer[subj.Position]; if (Utilities.IsEscapableSymbol(nextChar)) { // only ascii symbols and newline can be escaped // the exception is the unicode bullet char since it can be used for defining list items subj.Position++; return new Inline(nextChar.ToString(), subj.Position - 2, subj.Position); } else if (nextChar == '\n') { subj.Position++; return new Inline(InlineTag.LineBreak) { SourcePosition = subj.Position - 2, SourceLastPosition = subj.Position }; } else { return new Inline("\\", subj.Position - 1, subj.Position); } }
/// <summary> /// Parses the entity at the current position. /// Assumes that there is a <c>&</c> at the current position. /// </summary> private static string ParseEntity(Subject subj) { int match; string entity; int numericEntity; var origPos = subj.Position; match = Scanner.scan_entity(subj.Buffer, subj.Position, subj.Length - subj.Position, out entity, out numericEntity); if (match > 0) { subj.Position += match; if (entity != null) { entity = EntityDecoder.DecodeEntity(entity); if (entity != null) return entity; return subj.Buffer.Substring(origPos, match); } else if (numericEntity > 0) { entity = EntityDecoder.DecodeEntity(numericEntity); if (entity != null) return entity; } return "\uFFFD"; } else { subj.Position++; return "&"; } }
private static Inline HandleLeftSquareBracket(Subject subj, CommonMarkSettings settings) { return HandleLeftSquareBracket(subj, false, settings); }
internal static void MatchSquareBracketStack(InlineStack opener, Subject subj, Reference details, InlineParserParameters parameters) { if (details != null) { var inl = opener.StartingInline; var isImage = 0 != (opener.Flags & InlineStack.InlineStackFlags.ImageLink); inl.Tag = isImage ? InlineTag.Image : InlineTag.Link; inl.FirstChild = inl.NextSibling; inl.NextSibling = null; inl.SourceLastPosition = subj.Position; inl.TargetUrl = details.Url; inl.LiteralContent = details.Title; if (!isImage) { // since there cannot be nested links, remove any other link openers before this var temp = opener.Previous; while (temp != null && temp.Priority <= InlineStack.InlineStackPriority.Links) { if (temp.Delimeter == '[' && temp.Flags == opener.Flags) { // mark the previous entries as "inactive" if (temp.DelimeterCount == -1) break; temp.DelimeterCount = -1; } temp = temp.Previous; } } InlineStack.RemoveStackEntry(opener, subj, null, parameters); subj.LastInline = inl; } else { // this looked like a link, but was not. // remove the opener stack entry but leave the inbetween intact InlineStack.RemoveStackEntry(opener, subj, opener, parameters); var inl = new Inline("]", subj.Position - 1, subj.Position); subj.LastInline.LastSibling.NextSibling = inl; subj.LastInline = inl; } }
public static Inline parse_inlines(Subject subj, Dictionary<string, Reference> refmap, InlineParserParameters parameters) { var len = subj.Length; if (len == 0) return null; var first = ParseInline(subj, parameters); subj.LastInline = first.LastSibling; Inline cur; while (subj.Position < len) { cur = ParseInline(subj, parameters); if (cur != null) { subj.LastInline.NextSibling = cur; subj.LastInline = cur.LastSibling; } } InlineStack.PostProcessInlineStack(subj, subj.FirstPendingInline, subj.LastPendingInline, InlineStack.InlineStackPriority.Maximum, parameters); return first; }
// Parse zero or more space characters, including at most one newline. private static void spnl(Subject subj) { var seenNewline = false; var len = subj.Length; while (subj.Position < len) { var c = subj.Buffer[subj.Position]; if (c == ' ' || (!seenNewline && (seenNewline = c == '\n'))) subj.Position++; else return; } }
// Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. static Inline handle_newline(Subject subj) { int nlpos = subj.Position; // skip over newline subj.Position++; // skip spaces at beginning of line var len = subj.Length; while (subj.Position < len && subj.Buffer[subj.Position] == ' ') subj.Position++; if (nlpos > 1 && subj.Buffer[nlpos - 1] == ' ' && subj.Buffer[nlpos - 2] == ' ') return new Inline(InlineTag.LineBreak) { SourcePosition = nlpos - 2, SourceLastPosition = nlpos + 1 }; else return new Inline(InlineTag.SoftBreak) { SourcePosition = nlpos, SourceLastPosition = nlpos + 1 }; }
/// <summary> /// Parse an inline element from the subject. The subject position is updated to after the element. /// </summary> public static Inline ParseInline(Subject subj, InlineParserParameters parameters) { var parsers = parameters.Parsers; var specialCharacters = parameters.SpecialCharacters; var c = subj.Buffer[subj.Position]; var parser = c < parsers.Length ? parsers[c] : null; if (parser != null) return parser(subj); var startpos = subj.Position; // we read until we hit a special character // +1 is so that any special character at the current position is ignored. var endpos = subj.Buffer.IndexOfAny(specialCharacters, startpos + 1, subj.Length - startpos - 1); if (endpos == -1) endpos = subj.Length; subj.Position = endpos; // if we're at a newline, strip trailing spaces. if (endpos < subj.Length && subj.Buffer[endpos] == '\n') while (endpos > startpos && subj.Buffer[endpos - 1] == ' ') endpos--; return new Inline(subj.Buffer, startpos, endpos - startpos, startpos, endpos, c); }
// Parse zero or more space characters, including at most one newline. private static void spnl(Subject subj) { bool seen_newline = false; while (peek_char(subj) == ' ' || (!seen_newline && (seen_newline = peek_char(subj) == '\n'))) { advance(subj); } }
// Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. public static Syntax.Inline parse_inline(Subject subj) { Syntax.Inline inew = null; string contents; string special_chars; char? c; int endpos; c = peek_char(subj); if (c == null) return null; switch (c) { case '\n': inew = handle_newline(subj); break; case '`': inew = handle_backticks(subj); break; case '\\': inew = handle_backslash(subj); break; case '&': inew = handle_entity(subj); break; case '<': inew = handle_pointy_brace(subj); break; case '_': inew = HandleEmphasis(subj, '_'); break; case '*': inew = HandleEmphasis(subj, '*'); break; case '[': inew = handle_left_bracket(subj); break; case '!': advance(subj); if (peek_char(subj) == '[') { inew = handle_left_bracket(subj); if (inew != null && inew.Tag == InlineTag.Link) { inew.Tag = InlineTag.Image; } else { inew = append_inlines(make_str("!"), inew); } } else { inew = make_str("!"); } break; default: // we read until we hit a special character special_chars = "\n\\`&_*[]<!"; endpos = BString.binchr(subj.Buffer, subj.Position, special_chars); if (endpos == subj.Position) { // current char is special: read a 1-character str contents = take_one(subj); } else if (endpos == -1) { // special char not found, take whole rest of buffer: endpos = subj.Buffer.Length; contents = BString.bmidstr(subj.Buffer, subj.Position, endpos - subj.Position); subj.Position = endpos; } else { // take buffer from subj.pos to endpos to str. contents = BString.bmidstr(subj.Buffer, subj.Position, endpos - subj.Position); subj.Position = endpos; // if we're at a newline, strip trailing spaces. if (peek_char(subj) == '\n') contents = contents.TrimEnd(); } inew = make_str(contents); break; } return inew; }
// Parse inlines while a predicate is satisfied. Return inlines. public static Syntax.Inline parse_inlines_while(Subject subj) { Syntax.Inline first = null; Syntax.Inline cur; while (!is_eof(subj)) { cur = parse_inline(subj); if (first == null) { first = cur; subj.LastInline = cur.LastSibling; } else { subj.LastInline.Next = cur; subj.LastInline = cur.LastSibling; } } return first; }
private static Inline HandleOpenerCloser(Subject subj, InlineTag singleCharTag, InlineTag doubleCharTag, InlineParserParameters parameters) { bool canOpen, canClose; var c = subj.Buffer[subj.Position]; var numdelims = ScanEmphasisDelimeters(subj, c, out canOpen, out canClose); if (canClose) { // walk the stack and find a matching opener, if there is one var istack = InlineStack.FindMatchingOpener(subj.LastPendingInline, InlineStack.InlineStackPriority.Emphasis, c, out canClose); if (istack != null) { var useDelims = MatchInlineStack(istack, subj, numdelims, null, singleCharTag, doubleCharTag, parameters); // if the closer was not fully used, move back a char or two and try again. if (useDelims < numdelims) { subj.Position = subj.Position - numdelims + useDelims; // use recursion only if it will not be very deep. if (numdelims < 10) return HandleOpenerCloser(subj, singleCharTag, doubleCharTag, parameters); } return null; } } var inlText = new Inline(subj.Buffer, subj.Position - numdelims, numdelims, subj.Position - numdelims, subj.Position, c); if (canOpen || canClose) { var istack = new InlineStack(); istack.DelimeterCount = numdelims; istack.Delimeter = c; istack.StartingInline = inlText; istack.Priority = InlineStack.InlineStackPriority.Emphasis; istack.Flags = (canOpen ? InlineStack.InlineStackFlags.Opener : 0) | (canClose ? InlineStack.InlineStackFlags.Closer : 0); InlineStack.AppendStackEntry(istack, subj); } return inlText; }
/// <summary> /// Parses the contents of [..] for a reference label. Only used for parsing /// reference definition labels for use with the reference dictionary because /// it does not properly parse nested inlines. /// /// Assumes the source starts with '[' character or spaces before '['. /// Returns null and does not advance if no matching ] is found. /// Note the precedence: code backticks have precedence over label bracket /// markers, which have precedence over *, _, and other inline formatting /// markers. So, 2 below contains a link while 1 does not: /// 1. [a link `with a ](/url)` character /// 2. [a link *with emphasized ](/url) text* /// </summary> private static StringPart? ParseReferenceLabel(Subject subj) { var startPos = subj.Position; var source = subj.Buffer; var len = subj.Length; while (subj.Position < len) { var c = subj.Buffer[subj.Position]; if (c == ' ' || c == '\n') { subj.Position++; continue; } else if (c == '[') { subj.Position++; break; } else { subj.Position = startPos; return null; } } var labelStartPos = subj.Position; len = subj.Position + Reference.MaximumReferenceLabelLength; if (len > source.Length) len = source.Length; subj.Position = source.IndexOfAny(BracketSpecialCharacters, subj.Position, len - subj.Position); while (subj.Position > -1) { var c = source[subj.Position]; if (c == '\\') { subj.Position += 2; if (subj.Position >= len) break; subj.Position = source.IndexOfAny(BracketSpecialCharacters, subj.Position, len - subj.Position); } else if (c == '[') { break; } else { var label = new StringPart(source, labelStartPos, subj.Position - labelStartPos); subj.Position++; return label; } } subj.Position = startPos; return null; }
private static Inline HandleExclamation(Subject subj, CommonMarkSettings settings) { subj.Position++; if (peek_char(subj) == '[') return HandleLeftSquareBracket(subj, true, settings); else return new Inline("!", subj.Position - 1, subj.Position); }
// Parse reference. Assumes string begins with '[' character. // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. public static int ParseReference(Subject subj, CommonMarkSettings settings) { string title; var startPos = subj.Position; // parse label: var lab = ParseReferenceLabel(subj); if (lab == null || lab.Value.Length > Reference.MaximumReferenceLabelLength) goto INVALID; if (!Scanner.HasNonWhitespace(lab.Value)) goto INVALID; // colon: if (peek_char(subj) == ':') subj.Position++; else goto INVALID; // parse link url: spnl(subj); var matchlen = Scanner.scan_link_url(subj.Buffer, subj.Position, subj.Length); if (matchlen == 0) goto INVALID; var url = subj.Buffer.Substring(subj.Position, matchlen); url = CleanUrl(url); subj.Position += matchlen; // parse optional link_title var beforetitle = subj.Position; spnl(subj); matchlen = Scanner.scan_link_title(subj.Buffer, subj.Position, subj.Length); if (matchlen > 0) { title = subj.Buffer.Substring(subj.Position, matchlen); title = CleanTitle(title); subj.Position += matchlen; } else { subj.Position = beforetitle; title = string.Empty; } char c; // parse final spaces and newline: while ((c = peek_char(subj)) == ' ') subj.Position++; if (c == '\n') { subj.Position++; } else if (c != '\0') { if (matchlen > 0) { // try rewinding before title subj.Position = beforetitle; while ((c = peek_char(subj)) == ' ') subj.Position++; if (c == '\n') subj.Position++; else if (c != '\0') goto INVALID; } else { goto INVALID; } } // insert reference into refmap AddReference(subj.ReferenceMap, lab.Value, url, title, settings); return subj.Position; INVALID: subj.Position = startPos; return 0; }
private static Inline HandleLeftSquareBracket(Subject subj, bool isImage, CommonMarkSettings settings) { Inline inlText; if (isImage) { inlText = new Inline("![", subj.Position - 1, subj.Position + 1); } else { inlText = new Inline("[", subj.Position, subj.Position + 1); } // move past the '[' subj.Position++; var istack = new InlineStack(); istack.Delimeter = '['; istack.StartingInline = inlText; istack.StartPosition = subj.Position; istack.Priority = InlineStack.InlineStackPriority.Links; istack.Flags = InlineStack.InlineStackFlags.Opener | (isImage ? InlineStack.InlineStackFlags.ImageLink : InlineStack.InlineStackFlags.None); InlineStack.AppendStackEntry(istack, subj); return inlText; }
private static char peek_char(Subject subj) { return subj.Length <= subj.Position ? '\0' : subj.Buffer[subj.Position]; }
private static Inline HandleRightSquareBracket(Subject subj, CommonMarkSettings settings) { // move past ']' subj.Position++; bool canClose; var istack = InlineStack.FindMatchingOpener(subj.LastPendingInline, InlineStack.InlineStackPriority.Links, '[', out canClose); if (istack != null) { // if the opener is "inactive" then it means that there was a nested link if (istack.DelimeterCount == -1) { InlineStack.RemoveStackEntry(istack, subj, istack, settings.InlineParserParameters); return new Inline("]", subj.Position - 1, subj.Position); } var endpos = subj.Position; // try parsing details for '[foo](/url "title")' or '[foo][bar]' var details = ParseLinkDetails(subj, settings); // try lookup of the brackets themselves if (details == null || details == Reference.SelfReference) { var startpos = istack.StartPosition; var label = new StringPart(subj.Buffer, startpos, endpos - startpos - 1); details = LookupReference(subj.ReferenceMap, label, settings); } if (details == Reference.InvalidReference) details = null; MatchSquareBracketStack(istack, subj, details, settings.InlineParserParameters); return null; } var inlText = new Inline("]", subj.Position - 1, subj.Position); if (canClose) { // note that the current implementation will not work if there are other inlines with priority // higher than Links. // to fix this the parsed link details should be added to the closer element in the stack. throw new NotSupportedException("It is not supported to have inline stack priority higher than Links."); ////istack = new InlineStack(); ////istack.Delimeter = '['; ////istack.StartingInline = inlText; ////istack.StartPosition = subj.Position; ////istack.Priority = InlineStack.InlineStackPriority.Links; ////istack.Flags = InlineStack.InlineStackFlags.Closer; ////InlineStack.AppendStackEntry(istack, subj); } return inlText; }
/// <summary> /// Searches the subject for a span of backticks that matches the given length. /// Returns <c>0</c> if the closing backticks cannot be found, otherwise returns /// the position in the subject after the closing backticks. /// Also updates the position on the subject itself. /// </summary> private static int ScanToClosingBackticks(Subject subj, int openticklength) { // note - attempt to optimize by using string.IndexOf("````",...) proved to // be ~2x times slower than the current implementation. // but - buf.IndexOf('`') gives ~1.5x better performance than iterating over // every char in the loop. var buf = subj.Buffer; var len = buf.Length; var cc = 0; for (var i = subj.Position; i < len; i++) { if (buf[i] == '`') { cc++; } else { if (cc == openticklength) return subj.Position = i; i = buf.IndexOf('`', i, len - i) - 1; if (i == -2) return 0; cc = 0; } } if (cc == openticklength) return subj.Position = len; return 0; }
/// <summary> /// Parses the entity at the current position. Returns a new string inline. /// Assumes that there is a <c>&</c> at the current position. /// </summary> private static Inline HandleEntity(Subject subj) { var origPos = subj.Position; return new Inline(ParseEntity(subj), origPos, subj.Position); }
// Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static Inline handle_backticks(Subject subj) { int ticklength = 0; var bl = subj.Length; while (subj.Position < bl && (subj.Buffer[subj.Position] == '`')) { ticklength++; subj.Position++; } int startpos = subj.Position; int endpos = ScanToClosingBackticks(subj, ticklength); if (endpos == 0) { // closing not found subj.Position = startpos; // rewind to right after the opening ticks return new Inline(new string('`', ticklength), startpos - ticklength, startpos); } else { return new Inline(InlineTag.Code, NormalizeWhitespace(subj.Buffer, startpos, endpos - startpos - ticklength)) { SourcePosition = startpos - ticklength, SourceLastPosition = endpos }; } }
/// <summary> /// Creates a new <see cref="Inline"/> element that represents string content but the given content /// is processed to decode any HTML entities in it. /// This method is guaranteed to return just one Inline, without nested elements. /// </summary> private static Inline ParseStringEntities(string s) { string result = null; StringBuilder builder = null; int searchpos; char c; var subj = new Subject(s, null); while ('\0' != (c = peek_char(subj))) { if (result != null) { if (builder == null) builder = new StringBuilder(result, s.Length); else builder.Append(result); } if (c == '&') { result = ParseEntity(subj); } else { searchpos = subj.Buffer.IndexOf('&', subj.Position); if (searchpos == -1) searchpos = subj.Length; result = subj.Buffer.Substring(subj.Position, searchpos - subj.Position); subj.Position = searchpos; } } if (builder == null) return new Inline(result); builder.Append(result); return new Inline(builder.ToString()); }
/// <summary> /// Scans the subject for a series of the given emphasis character, testing if they could open and/or close /// an emphasis element. /// </summary> private static int ScanEmphasisDelimeters(Subject subj, char delimeter, out bool canOpen, out bool canClose) { int numdelims = 0; int startpos = subj.Position; int len = subj.Length; while (startpos + numdelims < len && subj.Buffer[startpos + numdelims] == delimeter) numdelims++; if (numdelims == 0) { canOpen = false; canClose = false; return numdelims; } char charBefore, charAfter; bool beforeIsSpace, beforeIsPunctuation, afterIsSpace, afterIsPunctuation; charBefore = startpos == 0 ? '\n' : subj.Buffer[startpos - 1]; subj.Position = (startpos += numdelims); charAfter = len == startpos ? '\n' : subj.Buffer[startpos]; Utilities.CheckUnicodeCategory(charBefore, out beforeIsSpace, out beforeIsPunctuation); Utilities.CheckUnicodeCategory(charAfter, out afterIsSpace, out afterIsPunctuation); canOpen = !afterIsSpace && !(afterIsPunctuation && !beforeIsSpace && !beforeIsPunctuation); canClose = !beforeIsSpace && !(beforeIsPunctuation && !afterIsSpace && !afterIsPunctuation); if (delimeter == '_') { var temp = canOpen; canOpen &= (!canClose || beforeIsPunctuation); canClose &= (!temp || afterIsPunctuation); } if (delimeter == '$') { canOpen &= !char.IsDigit(charAfter); } return numdelims; }
// Parse a link or the link portion of an image, or return a fallback. static Reference ParseLinkDetails(Subject subj, CommonMarkSettings settings) { int n; int sps; int endlabel, starturl, endurl, starttitle, endtitle, endall; string url, title; endlabel = subj.Position; var c = peek_char(subj); if (c == '(' && ((sps = Scanner.scan_spacechars(subj.Buffer, subj.Position + 1, subj.Length)) > -1) && ((n = Scanner.scan_link_url(subj.Buffer, subj.Position + 1 + sps, subj.Length)) > -1)) { // try to parse an explicit link: starturl = subj.Position + 1 + sps; // after ( endurl = starturl + n; starttitle = endurl + Scanner.scan_spacechars(subj.Buffer, endurl, subj.Length); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + Scanner.scan_link_title(subj.Buffer, starttitle, subj.Length); endall = endtitle + Scanner.scan_spacechars(subj.Buffer, endtitle, subj.Length); if (endall < subj.Length && subj.Buffer[endall] == ')') { subj.Position = endall + 1; url = subj.Buffer.Substring(starturl, endurl - starturl); url = CleanUrl(url); title = subj.Buffer.Substring(starttitle, endtitle - starttitle); title = CleanTitle(title); return new Reference() { Title = title, Url = url }; } } else if (c == '[' || c == ' ' || c == '\n') { var label = ParseReferenceLabel(subj); if (label != null) { if (label.Value.Length == 0) return Reference.SelfReference; var details = LookupReference(subj.ReferenceMap, label.Value, settings); if (details != null) return details; // rollback the subject but return InvalidReference so that the caller knows not to // parse 'foo' from [foo][bar]. subj.Position = endlabel; return Reference.InvalidReference; } } // rollback the subject position because didn't match anything. subj.Position = endlabel; return null; }
internal static int MatchInlineStack(InlineStack opener, Subject subj, int closingDelimeterCount, InlineStack closer, InlineTag singleCharTag, InlineTag doubleCharTag, InlineParserParameters parameters) { // calculate the actual number of delimeters used from this closer int useDelims; var openerDelims = opener.DelimeterCount; if (closingDelimeterCount < 3 || openerDelims < 3) { useDelims = closingDelimeterCount <= openerDelims ? closingDelimeterCount : openerDelims; if (useDelims == 2 && doubleCharTag == 0) useDelims = 1; if (useDelims == 1 && singleCharTag == 0) return 0; } else if (singleCharTag == 0) useDelims = 2; else if (doubleCharTag == 0) useDelims = 1; else useDelims = closingDelimeterCount % 2 == 0 ? 2 : 1; Inline inl = opener.StartingInline; InlineTag tag = useDelims == 1 ? singleCharTag : doubleCharTag; if (openerDelims == useDelims) { // the opener is completely used up - remove the stack entry and reuse the inline element inl.Tag = tag; inl.LiteralContent = null; inl.FirstChild = inl.NextSibling; inl.NextSibling = null; InlineStack.RemoveStackEntry(opener, subj, closer?.Previous, parameters); } else { // the opener will only partially be used - stack entry remains (truncated) and a new inline is added. opener.DelimeterCount -= useDelims; inl.LiteralContent = inl.LiteralContent.Substring(0, opener.DelimeterCount); inl.SourceLastPosition -= useDelims; inl.NextSibling = new Inline(tag, inl.NextSibling); inl = inl.NextSibling; inl.SourcePosition = opener.StartingInline.SourcePosition + opener.DelimeterCount; } // there are two callers for this method, distinguished by the `closer` argument. // if closer == null it means the method is called during the initial subject parsing and the closer // characters are at the current position in the subject. The main benefit is that there is nothing // parsed that is located after the matched inline element. // if closer != null it means the method is called when the second pass for previously unmatched // stack elements is done. The drawback is that there can be other elements after the closer. if (closer != null) { var clInl = closer.StartingInline; if ((closer.DelimeterCount -= useDelims) > 0) { // a new inline element must be created because the old one has to be the one that // finalizes the children of the emphasis var newCloserInline = new Inline(clInl.LiteralContent.Substring(useDelims)); newCloserInline.SourcePosition = inl.SourceLastPosition = clInl.SourcePosition + useDelims; newCloserInline.SourceLength = closer.DelimeterCount; newCloserInline.NextSibling = clInl.NextSibling; clInl.LiteralContent = null; clInl.NextSibling = null; inl.NextSibling = closer.StartingInline = newCloserInline; } else { inl.SourceLastPosition = clInl.SourceLastPosition; clInl.LiteralContent = null; inl.NextSibling = clInl.NextSibling; clInl.NextSibling = null; } } else if (subj != null) { inl.SourceLastPosition = subj.Position - closingDelimeterCount + useDelims; subj.LastInline = inl; } return useDelims; }
/// <summary> /// Walk through the block, its children and siblings, parsing string content into inline content where appropriate. /// </summary> /// <param name="block">The document level block from which to start the processing.</param> /// <param name="refmap">The reference mapping used when parsing links.</param> /// <param name="settings">The settings that influence how the inline parsing is performed.</param> public static void ProcessInlines(Block block, Dictionary<string, Reference> refmap, CommonMarkSettings settings) { Stack<Inline> inlineStack = null; var stack = new Stack<Block>(); var parsers = settings.InlineParsers; var specialCharacters = settings.InlineParserSpecialCharacters; var subj = new Subject(refmap); StringContent sc; int delta; while (block != null) { var tag = block.Tag; if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeader || tag == BlockTag.SETextHeader) { sc = block.StringContent; if (sc != null) { sc.FillSubject(subj); delta = subj.Position; block.InlineContent = InlineMethods.parse_inlines(subj, refmap, parsers, specialCharacters); block.StringContent = null; if (sc.PositionTracker != null) { sc.PositionTracker.AddBlockOffset(-delta); AdjustInlineSourcePosition(block.InlineContent, sc.PositionTracker, ref inlineStack); } } } if (block.FirstChild != null) { if (block.NextSibling != null) stack.Push(block.NextSibling); block = block.FirstChild; } else if (block.NextSibling != null) { block = block.NextSibling; } else if (stack.Count > 0) { block = stack.Pop(); } else { block = null; } } }
// Parse a link or the link portion of an image, or return a fallback. static Syntax.Inline handle_left_bracket(Subject subj) { Syntax.Inline lab = null; Syntax.Inline result = null; Reference refer; int n; int sps; bool found_label; int endlabel, starturl, endurl, starttitle, endtitle, endall; string url, title, reflabel; string rawlabel = ""; string rawlabel2 = ""; found_label = link_label(subj, ref rawlabel); endlabel = subj.Position; if (found_label) { if (peek_char(subj) == '(' && ((sps = Scanner.scan_spacechars(subj.Buffer, subj.Position + 1)) > -1) && ((n = Scanner.scan_link_url(subj.Buffer, subj.Position + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj.Position + 1 + sps; // after ( endurl = starturl + n; starttitle = endurl + Scanner.scan_spacechars(subj.Buffer, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + Scanner.scan_link_title(subj.Buffer, starttitle); endall = endtitle + Scanner.scan_spacechars(subj.Buffer, endtitle); if (BString.bchar(subj.Buffer, endall) == ')') { subj.Position = endall + 1; url = BString.bmidstr(subj.Buffer, starturl, endurl - starturl); url = CleanUrl(url); title = BString.bmidstr(subj.Buffer, starttitle, endtitle - starttitle); title = CleanTitle(title); lab = parse_inlines(rawlabel, null); return make_link(lab, url, title); } else { // if we get here, we matched a label but didn't get further: subj.Position = endlabel; lab = parse_inlines(rawlabel, subj.ReferenceMap); result = append_inlines(make_str("["), append_inlines(lab, make_str("]"))); return result; } } else { // Check for reference link. // First, see if there's another label: subj.Position = subj.Position + Scanner.scan_spacechars(subj.Buffer, endlabel); reflabel = rawlabel; // if followed by a nonempty link label, we change reflabel to it: if (peek_char(subj) == '[' && link_label(subj, ref rawlabel2)) { if (rawlabel2 != null && rawlabel2.Length > 0) { reflabel = rawlabel2; } } else { subj.Position = endlabel; } // lookup rawlabel in subject.reference_map: refer = lookup_reference(subj.ReferenceMap, reflabel); if (refer != null) { // found lab = parse_inlines(rawlabel, null); result = make_link(lab, refer.Url, refer.Title); } else { subj.Position = endlabel; lab = parse_inlines(rawlabel, subj.ReferenceMap); result = append_inlines(make_str("["), append_inlines(lab, make_str("]"))); } return result; } } // If we fall through to here, it means we didn't match a link: advance(subj); // advance past [ return make_str("["); }