Exemplo n.º 1
0
        // Parse a link or the link portion of an image, or return a fallback.
        static Reference ParseLinkDetails(Subject subj, bool supportPlaceholderBrackets)
        {
            int    n;
            int    sps;
            int    endlabel, starturl, endurl, starttitle, endtitle, endall;
            string url, title;

            endlabel = subj.Position;

            var c = peek_char(subj);

            if (c == '(' &&
                ((sps = Scanner.scan_spacechars(subj.Buffer, subj.Position + 1, subj.Length)) > -1) &&
                ((n = Scanner.scan_link_url(subj.Buffer, subj.Position + 1 + sps, subj.Length)) > -1))
            {
                // try to parse an explicit link:
                starturl   = subj.Position + 1 + sps; // after (
                endurl     = starturl + n;
                starttitle = endurl + Scanner.scan_spacechars(subj.Buffer, endurl, subj.Length);
                // ensure there are spaces btw url and title
                endtitle = (starttitle == endurl) ? starttitle :
                           starttitle + Scanner.scan_link_title(subj.Buffer, starttitle, subj.Length);
                endall = endtitle + Scanner.scan_spacechars(subj.Buffer, endtitle, subj.Length);
                if (endall < subj.Length && subj.Buffer[endall] == ')')
                {
                    subj.Position = endall + 1;
                    url           = subj.Buffer.Substring(starturl, endurl - starturl);
                    url           = CleanUrl(url);
                    title         = subj.Buffer.Substring(starttitle, endtitle - starttitle);
                    title         = CleanTitle(title);

                    return(new Reference()
                    {
                        Title = title, Url = url
                    });
                }
            }
            else if (c == '[')
            {
                var label = ParseReferenceLabel(subj);
                if (label != null)
                {
                    if (label.Value.Length == 0)
                    {
                        return(Reference.SelfReference);
                    }

                    var details = LookupReference(subj.DocumentData, label.Value);
                    if (details != null)
                    {
                        return(details);
                    }

                    // rollback the subject but return InvalidReference so that the caller knows not to
                    // parse 'foo' from [foo][bar].
                    subj.Position = endlabel;
                    return(Reference.InvalidReference);
                }
            }

            // rollback the subject position because didn't match anything.
            subj.Position = endlabel;
            if (supportPlaceholderBrackets)
            {
                return(new Reference()
                {
                    Url = subj.Buffer.Substring(subj.LastPendingInline.StartPosition, subj.Position - subj.LastPendingInline.StartPosition - 1),
                    IsPlaceholder = true
                });
            }
            else
            {
                return(null);
            }
        }
Exemplo n.º 2
0
        // Process one line at a time, modifying a block.
        // Returns 0 if successful.  curptr is changed to point to
        // the currently open block.
        public static void IncorporateLine(LineInfo line, ref Block curptr)
        {
            var ln = line.Line;

            Block last_matched_container;

            // offset is the char position in the line
            var offset = 0;

            // column is the virtual position in the line that takes TAB expansion into account
            var column = 0;

            // the adjustment to the virtual position `column` that points to the number of spaces from the TAB that have not been included in any indent.
            var remainingSpaces = 0;

            // the char position of the first non-space char
            int first_nonspace;

            // the virtual position of the first non-space chart, that includes TAB expansion
            int first_nonspace_column;

            int      matched;
            int      i;
            ListData data;
            bool     all_matched = true;
            Block    cur         = curptr;
            var      blank       = false;
            char     curChar;
            int      indent;

            // container starts at the document root.
            var container = cur.Top;

            // for each containing block, try to parse the associated line start.
            // bail out on failure:  container will point to the last matching block.

            while (container.LastChild != null && container.LastChild.IsOpen)
            {
                container = container.LastChild;

                FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar);

                indent = first_nonspace_column - column + remainingSpaces;
                blank  = curChar == '\n';

                switch (container.Tag)
                {
                case BlockTag.BlockQuote:
                {
                    if (indent <= 3 && curChar == '>')
                    {
                        AdvanceOffset(ln, indent + 1, true, ref offset, ref column, ref remainingSpaces);
                        AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces);
                    }
                    else
                    {
                        all_matched = false;
                    }

                    break;
                }

                case BlockTag.ListItem:
                {
                    if (indent >= container.ListData.MarkerOffset + container.ListData.Padding)
                    {
                        AdvanceOffset(ln, container.ListData.MarkerOffset + container.ListData.Padding, true, ref offset, ref column, ref remainingSpaces);
                    }
                    else if (blank && container.FirstChild != null)
                    {
                        // if container->first_child is NULL, then the opening line
                        // of the list item was blank after the list marker; in this
                        // case, we are done with the list item.
                        AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces);
                    }
                    else
                    {
                        all_matched = false;
                    }

                    break;
                }

                case BlockTag.IndentedCode:
                {
                    if (indent >= CODE_INDENT)
                    {
                        AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces);
                    }
                    else if (blank)
                    {
                        AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces);
                    }
                    else
                    {
                        all_matched = false;
                    }

                    break;
                }

                case BlockTag.AtxHeading:
                case BlockTag.SetextHeading:
                {
                    // a heading can never contain more than one line
                    all_matched = false;
                    if (blank)
                    {
                        container.IsLastLineBlank = true;
                    }

                    break;
                }

                case BlockTag.FencedCode:
                {
                    // -1 means we've seen closer
                    if (container.FencedCodeData.FenceLength == -1)
                    {
                        all_matched = false;
                        if (blank)
                        {
                            container.IsLastLineBlank = true;
                        }
                    }
                    else
                    {
                        // skip optional spaces of fence offset
                        i = container.FencedCodeData.FenceOffset;
                        while (i > 0 && ln[offset] == ' ')
                        {
                            offset++;
                            column++;
                            i--;
                        }
                    }

                    break;
                }

                case BlockTag.HtmlBlock:
                {
                    // all other block types can accept blanks
                    if (blank && container.HtmlBlockType >= HtmlBlockType.InterruptingBlock)
                    {
                        container.IsLastLineBlank = true;
                        all_matched = false;
                    }

                    break;
                }

                case BlockTag.Paragraph:
                {
                    if (blank)
                    {
                        container.IsLastLineBlank = true;
                        all_matched = false;
                    }

                    break;
                }
                }

                if (!all_matched)
                {
                    container = container.Parent;  // back up to last matching block
                    break;
                }
            }

            last_matched_container = container;

            // check to see if we've hit 2nd blank line, break out of list:
            if (blank && container.IsLastLineBlank)
            {
                BreakOutOfLists(ref container, line);
            }

            var maybeLazy = cur.Tag == BlockTag.Paragraph;

            // unless last matched container is code block, try new container starts:
            while (container.Tag != BlockTag.FencedCode &&
                   container.Tag != BlockTag.IndentedCode &&
                   container.Tag != BlockTag.HtmlBlock)
            {
                FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar);

                indent = first_nonspace_column - column;
                blank  = curChar == '\n';

                var indented = indent >= CODE_INDENT;

                if (!indented && curChar == '>')
                {
                    AdvanceOffset(ln, first_nonspace + 1 - offset, false, ref offset, ref column, ref remainingSpaces);
                    AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces);

                    container = CreateChildBlock(container, line, BlockTag.BlockQuote, first_nonspace);
                }
                else if (!indented && curChar == '#' && 0 != (matched = Scanner.scan_atx_heading_start(ln, first_nonspace, ln.Length, out i)))
                {
                    AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces);
                    container         = CreateChildBlock(container, line, BlockTag.AtxHeading, first_nonspace);
                    container.Heading = new HeadingData(i);
                }
                else if (!indented && (curChar == '`' || curChar == '~') && 0 != (matched = Scanner.scan_open_code_fence(ln, first_nonspace, ln.Length)))
                {
                    container = CreateChildBlock(container, line, BlockTag.FencedCode, first_nonspace);
                    container.FencedCodeData             = new FencedCodeData();
                    container.FencedCodeData.FenceChar   = curChar;
                    container.FencedCodeData.FenceLength = matched;
                    container.FencedCodeData.FenceOffset = first_nonspace - offset;

                    AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces);
                }
                else if (!indented && curChar == '<' &&
                         (0 != (matched = (int)Scanner.scan_html_block_start(ln, first_nonspace, ln.Length)) ||
                          (container.Tag != BlockTag.Paragraph && 0 != (matched = (int)Scanner.scan_html_block_start_7(ln, first_nonspace, ln.Length)))
                         ))
                {
                    container = CreateChildBlock(container, line, BlockTag.HtmlBlock, first_nonspace);
                    container.HtmlBlockType = (HtmlBlockType)matched;
                    // note, we don't adjust offset because the tag is part of the text
                }
                else if (!indented && container.Tag == BlockTag.Paragraph && (curChar == '=' || curChar == '-') &&
                         0 != (matched = Scanner.scan_setext_heading_line(ln, first_nonspace, ln.Length)))
                {
                    container.Tag     = BlockTag.SetextHeading;
                    container.Heading = new HeadingData(matched);
                    AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces);
                }
                else if (!indented &&
                         !(container.Tag == BlockTag.Paragraph && !all_matched) &&
                         0 != (Scanner.scan_thematic_break(ln, first_nonspace, ln.Length)))
                {
                    // it's only now that we know the line is not part of a setext heading:
                    container = CreateChildBlock(container, line, BlockTag.ThematicBreak, first_nonspace);
                    Finalize(container, line);
                    container = container.Parent;
                    AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces);
                }
                else if ((!indented || container.Tag == BlockTag.List) &&
                         0 != (matched = ParseListMarker(ln, first_nonspace, out data)))
                {
                    // compute padding:
                    AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces);

                    var prevOffset          = offset;
                    var prevColumn          = column;
                    var prevRemainingSpaces = remainingSpaces;

                    while (column - prevColumn <= CODE_INDENT)
                    {
                        if (!AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces))
                        {
                            break;
                        }
                    }

                    // i = number of spaces after marker, up to 5
                    if (column == prevColumn)
                    {
                        // no spaces at all
                        data.Padding = matched + 1;
                    }
                    else if (column - prevColumn > CODE_INDENT || ln[offset] == '\n')
                    {
                        data.Padding = matched + 1;

                        // too many (or none) spaces, ignoring everything but the first one
                        offset          = prevOffset;
                        column          = prevColumn;
                        remainingSpaces = prevRemainingSpaces;
                        AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces);
                    }
                    else
                    {
                        data.Padding = matched + column - prevColumn;
                    }

                    // check container; if it's a list, see if this list item
                    // can continue the list; otherwise, create a list container.

                    data.MarkerOffset = indent;

                    if (container.Tag != BlockTag.List || !ListsMatch(container.ListData, data))
                    {
                        container          = CreateChildBlock(container, line, BlockTag.List, first_nonspace);
                        container.ListData = data;
                    }

                    // add the list item
                    container          = CreateChildBlock(container, line, BlockTag.ListItem, first_nonspace);
                    container.ListData = data;
                }
                else if (indented && !maybeLazy && !blank)
                {
                    AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces);
                    container = CreateChildBlock(container, line, BlockTag.IndentedCode, offset);
                }
                else
                {
                    break;
                }

                if (AcceptsLines(container.Tag))
                {
                    // if it's a line container, it can't contain other containers
                    break;
                }

                maybeLazy = false;
            }

            // what remains at offset is a text line.  add the text to the
            // appropriate container.

            FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar);
            indent = first_nonspace_column - column;
            blank  = curChar == '\n';

            if (blank && container.LastChild != null)
            {
                container.LastChild.IsLastLineBlank = true;
            }

            // block quote lines are never blank as they start with >
            // and we don't count blanks in fenced code for purposes of tight/loose
            // lists or breaking out of lists.  we also don't set last_line_blank
            // on an empty list item.
            container.IsLastLineBlank = (blank &&
                                         container.Tag != BlockTag.BlockQuote &&
                                         container.Tag != BlockTag.SetextHeading &&
                                         container.Tag != BlockTag.FencedCode &&
                                         !(container.Tag == BlockTag.ListItem &&
                                           container.FirstChild == null &&
                                           container.SourcePosition >= line.LineOffset));

            Block cont = container;

            while (cont.Parent != null)
            {
                cont.Parent.IsLastLineBlank = false;
                cont = cont.Parent;
            }

            if (cur != last_matched_container &&
                container == last_matched_container &&
                !blank &&
                cur.Tag == BlockTag.Paragraph &&
                cur.StringContent.Length > 0)
            {
                AddLine(cur, line, ln, offset, remainingSpaces);
            }
            else
            { // not a lazy continuation
                // finalize any blocks that were not matched and set cur to container:
                while (cur != last_matched_container)
                {
                    Finalize(cur, line);
                    cur = cur.Parent;

                    if (cur == null)
                    {
                        throw new CommonMarkException("Cannot finalize container block. Last matched container tag = " + last_matched_container.Tag);
                    }
                }

                if (container.Tag == BlockTag.IndentedCode)
                {
                    AddLine(container, line, ln, offset, remainingSpaces);
                }
                else if (container.Tag == BlockTag.FencedCode)
                {
                    if ((indent <= 3 &&
                         curChar == container.FencedCodeData.FenceChar) &&
                        (0 != Scanner.scan_close_code_fence(ln, first_nonspace, container.FencedCodeData.FenceLength, ln.Length)))
                    {
                        // if closing fence, set fence length to -1. it will be closed when the next line is processed.
                        container.FencedCodeData.FenceLength = -1;
                    }
                    else
                    {
                        AddLine(container, line, ln, offset, remainingSpaces);
                    }
                }
                else if (container.Tag == BlockTag.HtmlBlock)
                {
                    AddLine(container, line, ln, offset, remainingSpaces);

                    if (Scanner.scan_html_block_end(container.HtmlBlockType, ln, first_nonspace, ln.Length))
                    {
                        Finalize(container, line);
                        container = container.Parent;
                    }
                }
                else if (blank)
                {
                    // ??? do nothing
                }
                else if (container.Tag == BlockTag.AtxHeading)
                {
                    int p = ln.Length - 1;

                    // trim trailing spaces
                    while (p >= 0 && (ln[p] == ' ' || ln[p] == '\n'))
                    {
                        p--;
                    }

                    // if string ends in #s, remove these:
                    while (p >= 0 && ln[p] == '#')
                    {
                        p--;
                    }

                    // there must be a space before the last hashtag
                    if (p < 0 || ln[p] != ' ')
                    {
                        p = ln.Length - 1;
                    }

                    AddLine(container, line, ln, first_nonspace, remainingSpaces, p - first_nonspace + 1);
                    Finalize(container, line);
                    container = container.Parent;
                }
                else if (AcceptsLines(container.Tag))
                {
                    AddLine(container, line, ln, first_nonspace, remainingSpaces);
                }
                else if (container.Tag != BlockTag.ThematicBreak && container.Tag != BlockTag.SetextHeading)
                {
                    // create paragraph container for line
                    container = CreateChildBlock(container, line, BlockTag.Paragraph, first_nonspace);
                    AddLine(container, line, ln, first_nonspace, remainingSpaces);
                }
                else
                {
                    Utilities.Warning("Line {0} with container type {1} did not match any condition:\n\"{2}\"", line.LineNumber, container.Tag, ln);
                }

                curptr = container;
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Destructively unescape a string: remove backslashes before punctuation or symbol characters.
        /// </summary>
        /// <param name="url">The string data that will be changed by unescaping any punctuation or symbol characters.</param>
        public static string Unescape(string url)
        {
            // remove backslashes before punctuation chars:
            int  searchPos = 0;
            int  lastPos   = 0;
            int  match;
            char c;

            char[]        search = new[] { '\\', '&' };
            StringBuilder sb     = null;

            while ((searchPos = url.IndexOfAny(search, searchPos)) != -1)
            {
                c = url[searchPos];
                if (c == '\\')
                {
                    searchPos++;

                    if (url.Length == searchPos)
                    {
                        break;
                    }

                    c = url[searchPos];
                    if (Utilities.IsEscapableSymbol(c))
                    {
                        if (sb == null)
                        {
                            sb = new StringBuilder(url.Length);
                        }
                        sb.Append(url, lastPos, searchPos - lastPos - 1);
                        lastPos = searchPos;
                    }
                }
                else if (c == '&')
                {
                    string namedEntity;
                    int    numericEntity;
                    match = Scanner.scan_entity(url, searchPos, url.Length - searchPos, out namedEntity, out numericEntity);
                    if (match == 0)
                    {
                        searchPos++;
                    }
                    else
                    {
                        searchPos += match;

                        if (namedEntity != null)
                        {
                            var decoded = EntityDecoder.DecodeEntity(namedEntity);
                            if (decoded != null)
                            {
                                if (sb == null)
                                {
                                    sb = new StringBuilder(url.Length);
                                }
                                sb.Append(url, lastPos, searchPos - match - lastPos);
                                sb.Append(decoded);
                                lastPos = searchPos;
                            }
                        }
                        else if (numericEntity > 0)
                        {
                            var decoded = EntityDecoder.DecodeEntity(numericEntity);
                            if (decoded != null)
                            {
                                if (sb == null)
                                {
                                    sb = new StringBuilder(url.Length);
                                }
                                sb.Append(url, lastPos, searchPos - match - lastPos);
                                sb.Append(decoded);
                            }
                            else
                            {
                                if (sb == null)
                                {
                                    sb = new StringBuilder(url.Length);
                                }
                                sb.Append(url, lastPos, searchPos - match - lastPos);
                                sb.Append('\uFFFD');
                            }

                            lastPos = searchPos;
                        }
                    }
                }
            }

            if (sb == null)
            {
                return(url);
            }

            sb.Append(url, lastPos, url.Length - lastPos);
            return(sb.ToString());
        }
Exemplo n.º 4
0
        /// <summary>
        /// Attempts to parse a list item marker (bullet or enumerated).
        /// On success, returns length of the marker, and populates
        /// data with the details.  On failure, returns 0.
        /// </summary>
        /// <remarks>Original: int parse_list_marker(string ln, int pos, ref ListData dataptr)</remarks>
        private static int ParseListMarker(string ln, int pos, out ListData data)
        {
            char c;
            int  startpos;

            data = null;
            var len = ln.Length;

            startpos = pos;
            c        = ln[pos];

            if (c == '+' || c == '•' || ((c == '*' || c == '-') && 0 == Scanner.scan_thematic_break(ln, pos, len)))
            {
                pos++;
                if (pos == len || !Utilities.IsWhitespace(ln[pos]))
                {
                    return(0);
                }

                data            = new ListData();
                data.BulletChar = c;
                data.Start      = 1;
            }
            else if (c >= '0' && c <= '9')
            {
                int start = c - '0';

                while (pos < len - 1)
                {
                    c = ln[++pos];
                    // We limit to 9 digits to avoid overflow, This also seems to be the limit for 'start' in some browsers.
                    if (c >= '0' && c <= '9' && start < 100000000)
                    {
                        start = start * 10 + (c - '0');
                    }
                    else
                    {
                        break;
                    }
                }

                if (pos >= len - 1 || (c != '.' && c != ')'))
                {
                    return(0);
                }

                pos++;
                if (pos == len || !Utilities.IsWhitespace(ln[pos]))
                {
                    return(0);
                }

                data            = new ListData();
                data.ListType   = ListType.Ordered;
                data.BulletChar = '\0';
                data.Start      = start;
                data.Delimiter  = (c == '.' ? ListDelimiter.Period : ListDelimiter.Parenthesis);
            }
            else
            {
                return(0);
            }

            return(pos - startpos);
        }
Exemplo n.º 5
0
        // Parse reference.  Assumes string begins with '[' character.
        // Modify refmap if a reference is encountered.
        // Return 0 if no reference found, otherwise position of subject
        // after reference is parsed.
        public static int ParseReference(Subject subj)
        {
            string url      = null;
            string title    = null;
            int    matchlen = 0;
            int    beforetitle;
            var    startPos = subj.Position;

            // parse label:
            var lab = ParseReferenceLabel(subj);

            if (lab == null || lab.Value.Length > Reference.MaximumReferenceLabelLength)
            {
                goto INVALID;
            }

            // colon:
            if (peek_char(subj) == ':')
            {
                subj.Position++;
            }
            else
            {
                goto INVALID;
            }

            // parse link url:
            spnl(subj);
            matchlen = Scanner.scan_link_url(subj.Buffer, subj.Position, subj.Length);
            if (matchlen == 0)
            {
                goto INVALID;
            }

            url            = subj.Buffer.Substring(subj.Position, matchlen);
            url            = CleanUrl(url);
            subj.Position += matchlen;

            // parse optional link_title
            beforetitle = subj.Position;
            spnl(subj);
            matchlen = Scanner.scan_link_title(subj.Buffer, subj.Position, subj.Length);
            if (matchlen > 0)
            {
                title          = subj.Buffer.Substring(subj.Position, matchlen);
                title          = CleanTitle(title);
                subj.Position += matchlen;
            }
            else
            {
                subj.Position = beforetitle;
                title         = string.Empty;
            }

            // parse final spaces and newline:
            while (peek_char(subj) == ' ')
            {
                subj.Position++;
            }

            if (peek_char(subj) == '\n')
            {
                subj.Position++;
            }
            else if (peek_char(subj) != '\0')
            {
                goto INVALID;
            }

            // insert reference into refmap
            AddReference(subj.ReferenceMap, lab.Value, url, title);

            return(subj.Position);

INVALID:
            subj.Position = startPos;
            return(0);
        }