/// <summary>
        /// Parses a header that starts with a hash.
        /// </summary>
        /// <param name="markdown"> The markdown text. </param>
        /// <param name="start"> The location of the first hash character. </param>
        /// <param name="end"> The location of the end of the line. </param>
        /// <returns> A parsed header block, or <c>null</c> if this is not a header. </returns>
        internal static HeaderBlock ParseHashPrefixedHeader(string markdown, int start, int end)
        {
            // This type of header starts with one or more '#' characters, followed by the header
            // text, optionally followed by any number of hash characters.
            var result = new HeaderBlock();

            // Figure out how many consecutive hash characters there are.
            int pos = start;

            while (pos < end && markdown[pos] == '#' && pos - start < 6)
            {
                pos++;
            }

            result.HeaderLevel = pos - start;
            if (result.HeaderLevel == 0)
            {
                return(null);
            }

            // Ignore any hashes at the end of the line.
            while (pos < end && markdown[end - 1] == '#')
            {
                end--;
            }

            // Parse the inline content.
            result.Inlines = Common.ParseInlineChildren(markdown, pos, end);
            return(result);
        }
        /// <summary>
        /// Parses a two-line header.
        /// </summary>
        /// <param name="markdown"> The markdown text. </param>
        /// <param name="firstLineStart"> The location of the start of the first line. </param>
        /// <param name="firstLineEnd"> The location of the end of the first line. </param>
        /// <param name="secondLineStart"> The location of the start of the second line. </param>
        /// <param name="secondLineEnd"> The location of the end of the second line. </param>
        /// <returns> A parsed header block, or <c>null</c> if this is not a header. </returns>
        internal static HeaderBlock ParseUnderlineStyleHeader(string markdown, int firstLineStart, int firstLineEnd, int secondLineStart, int secondLineEnd)
        {
            // This type of header starts with some text on the first line, followed by one or more
            // underline characters ('=' or '-') on the second line.
            // The second line can have whitespace after the underline characters, but not before
            // or between each character.

            // Check the second line is valid.
            if (secondLineEnd <= secondLineStart)
            {
                return(null);
            }

            // Figure out what the underline character is ('=' or '-').
            char underlineChar = markdown[secondLineStart];

            if (underlineChar != '=' && underlineChar != '-')
            {
                return(null);
            }

            // Read past consecutive underline characters.
            int pos = secondLineStart + 1;

            for (; pos < secondLineEnd; pos++)
            {
                char c = markdown[pos];
                if (c != underlineChar)
                {
                    break;
                }

                pos++;
            }

            // The rest of the line must be whitespace.
            for (; pos < secondLineEnd; pos++)
            {
                char c = markdown[pos];
                if (c != ' ' && c != '\t')
                {
                    return(null);
                }

                pos++;
            }

            var result = new HeaderBlock();

            result.HeaderLevel = underlineChar == '=' ? 1 : 2;

            // Parse the inline content.
            result.Inlines = Common.ParseInlineChildren(markdown, firstLineStart, firstLineEnd);
            return(result);
        }
Beispiel #3
0
        /// <summary>
        /// Parses a markdown document.
        /// </summary>
        /// <param name="markdown"> The markdown text. </param>
        /// <param name="start"> The position to start parsing. </param>
        /// <param name="end"> The position to stop parsing. </param>
        /// <param name="quoteDepth"> The current nesting level for block quoting. </param>
        /// <param name="actualEnd"> Set to the position at which parsing ended.  This can be
        /// different from <paramref name="end"/> when the parser is being called recursively.
        /// </param>
        /// <returns> A list of parsed blocks. </returns>
        internal static List <MarkdownBlock> Parse(string markdown, int start, int end, int quoteDepth, out int actualEnd)
        {
            // We need to parse out the list of blocks.
            // Some blocks need to start on a new paragraph (code, lists and tables) while other
            // blocks can start on any line (headers, horizontal rules and quotes).
            // Text that is outside of any other block becomes a paragraph.
            var  blocks                 = new List <MarkdownBlock>();
            int  startOfLine            = start;
            bool lineStartsNewParagraph = true;
            var  paragraphText          = new StringBuilder();

            // These are needed to parse underline-style header blocks.
            int previousStartOfLine = start;
            int previousEndOfLine   = start;

            // Go line by line.
            while (startOfLine < end)
            {
                // Find the first non-whitespace character.
                int  nonSpacePos             = startOfLine;
                char nonSpaceChar            = '\0';
                int  realStartOfLine         = startOfLine; // i.e. including quotes.
                int  expectedQuotesRemaining = quoteDepth;
                while (true)
                {
                    while (nonSpacePos < end)
                    {
                        char c = markdown[nonSpacePos];
                        if (c == '\r' || c == '\n')
                        {
                            // The line is either entirely whitespace, or is empty.
                            break;
                        }

                        if (c != ' ' && c != '\t')
                        {
                            // The line has content.
                            nonSpaceChar = c;
                            break;
                        }

                        nonSpacePos++;
                    }

                    // When parsing blocks in a blockquote context, we need to count the number of
                    // quote characters ('>').  If there are less than expected AND this is the
                    // start of a new paragraph, then stop parsing.
                    if (expectedQuotesRemaining == 0)
                    {
                        break;
                    }

                    if (nonSpaceChar == '>')
                    {
                        // Expected block quote characters should be ignored.
                        expectedQuotesRemaining--;
                        nonSpacePos++;
                        nonSpaceChar = '\0';
                        startOfLine  = nonSpacePos;

                        // Ignore the first space after the quote character, if there is one.
                        if (startOfLine < end && markdown[startOfLine] == ' ')
                        {
                            startOfLine++;
                            nonSpacePos++;
                        }
                    }
                    else
                    {
                        // There were less block quote characters than expected.
                        // But it doesn't matter if this is not the start of a new paragraph.
                        if (!lineStartsNewParagraph || nonSpaceChar == '\0')
                        {
                            break;
                        }

                        // This must be the end of the blockquote.  End the current paragraph, if any.
                        actualEnd = previousEndOfLine;
                        if (paragraphText.Length > 0)
                        {
                            blocks.Add(ParagraphBlock.Parse(paragraphText.ToString()));
                        }

                        return(blocks);
                    }
                }

                // Find the end of the current line.
                int startOfNextLine;
                int endOfLine = Common.FindNextSingleNewLine(markdown, nonSpacePos, end, out startOfNextLine);

                if (nonSpaceChar == '\0')
                {
                    // The line is empty or nothing but whitespace.
                    lineStartsNewParagraph = true;

                    // End the current paragraph.
                    if (paragraphText.Length > 0)
                    {
                        blocks.Add(ParagraphBlock.Parse(paragraphText.ToString()));
                        paragraphText.Clear();
                    }
                }
                else
                {
                    // This is a header if the line starts with a hash character,
                    // or if the line starts with '-' or a '=' character and has no other characters.
                    // Or a quote if the line starts with a greater than character (optionally preceded by whitespace).
                    // Or a horizontal rule if the line contains nothing but 3 '*', '-' or '_' characters (with optional whitespace).
                    MarkdownBlock newBlockElement = null;
                    if (nonSpaceChar == '#' && nonSpacePos == startOfLine)
                    {
                        // Hash-prefixed header.
                        newBlockElement = HeaderBlock.ParseHashPrefixedHeader(markdown, startOfLine, endOfLine);
                    }
                    else if ((nonSpaceChar == '-' || nonSpaceChar == '=') && nonSpacePos == startOfLine && paragraphText.Length > 0)
                    {
                        // Underline style header. These are weird because you don't know you've
                        // got one until you've gone past it.
                        // Note: we intentionally deviate from reddit here in that we only
                        // recognize this type of header if the previous line is part of a
                        // paragraph.  For example if you have this, the header at the bottom is
                        // ignored:
                        //   a|b
                        //   -|-
                        //   1|2
                        //   ===
                        newBlockElement = HeaderBlock.ParseUnderlineStyleHeader(markdown, previousStartOfLine, previousEndOfLine, startOfLine, endOfLine);

                        if (newBlockElement != null)
                        {
                            // We're going to have to remove the header text from the pending
                            // paragraph by prematurely ending the current paragraph.
                            // We already made sure that there is a paragraph in progress.
                            paragraphText.Length = paragraphText.Length - (previousEndOfLine - previousStartOfLine);
                        }
                    }

                    // These characters overlap with the underline-style header - this check should go after that one.
                    if (newBlockElement == null && (nonSpaceChar == '*' || nonSpaceChar == '-' || nonSpaceChar == '_'))
                    {
                        newBlockElement = HorizontalRuleBlock.Parse(markdown, startOfLine, endOfLine);
                    }

                    if (newBlockElement == null && lineStartsNewParagraph)
                    {
                        // Some block elements must start on a new paragraph (tables, lists and code).
                        int endOfBlock = startOfNextLine;
                        if (nonSpaceChar == '*' || nonSpaceChar == '+' || nonSpaceChar == '-' || (nonSpaceChar >= '0' && nonSpaceChar <= '9'))
                        {
                            newBlockElement = ListBlock.Parse(markdown, realStartOfLine, end, quoteDepth, out endOfBlock);
                        }

                        if (newBlockElement == null && (nonSpacePos > startOfLine || nonSpaceChar == '`'))
                        {
                            newBlockElement = CodeBlock.Parse(markdown, realStartOfLine, end, quoteDepth, out endOfBlock);
                        }

                        if (newBlockElement == null)
                        {
                            newBlockElement = TableBlock.Parse(markdown, realStartOfLine, endOfLine, end, quoteDepth, out endOfBlock);
                        }

                        if (newBlockElement != null)
                        {
                            startOfNextLine = endOfBlock;
                        }
                    }

                    // This check needs to go after the code block check.
                    if (newBlockElement == null && nonSpaceChar == '>')
                    {
                        newBlockElement = QuoteBlock.Parse(markdown, realStartOfLine, end, quoteDepth, out startOfNextLine);
                    }

                    // This check needs to go after the code block check.
                    if (newBlockElement == null && nonSpaceChar == '[')
                    {
                        newBlockElement = LinkReferenceBlock.Parse(markdown, startOfLine, endOfLine);
                    }

                    // Block elements start new paragraphs.
                    lineStartsNewParagraph = newBlockElement != null;

                    if (newBlockElement == null)
                    {
                        // The line contains paragraph text.
                        if (paragraphText.Length > 0)
                        {
                            // If the previous two characters were both spaces, then append a line break.
                            if (paragraphText.Length > 2 && paragraphText[paragraphText.Length - 1] == ' ' && paragraphText[paragraphText.Length - 2] == ' ')
                            {
                                // Replace the two spaces with a line break.
                                paragraphText[paragraphText.Length - 2] = '\r';
                                paragraphText[paragraphText.Length - 1] = '\n';
                            }
                            else
                            {
                                paragraphText.Append(" ");
                            }
                        }

                        // Add the last paragraph if we are at the end of the input text.
                        if (startOfNextLine >= end)
                        {
                            if (paragraphText.Length == 0)
                            {
                                // Optimize for single line paragraphs.
                                blocks.Add(ParagraphBlock.Parse(markdown.Substring(startOfLine, endOfLine - startOfLine)));
                            }
                            else
                            {
                                // Slow path.
                                paragraphText.Append(markdown.Substring(startOfLine, endOfLine - startOfLine));
                                blocks.Add(ParagraphBlock.Parse(paragraphText.ToString()));
                            }
                        }
                        else
                        {
                            paragraphText.Append(markdown.Substring(startOfLine, endOfLine - startOfLine));
                        }
                    }
                    else
                    {
                        // The line contained a block.  End the current paragraph, if any.
                        if (paragraphText.Length > 0)
                        {
                            blocks.Add(ParagraphBlock.Parse(paragraphText.ToString()));
                            paragraphText.Clear();
                        }

                        blocks.Add(newBlockElement);
                    }
                }

                // Repeat.
                previousStartOfLine = startOfLine;
                previousEndOfLine   = endOfLine;
                startOfLine         = startOfNextLine;
            }

            actualEnd = startOfLine;
            return(blocks);
        }