/// <summary> /// Parsing helper. /// </summary> /// <returns> <c>true</c> if any of the list items were parsed using the block parser. </returns> private static bool ReplaceStringBuilders(ListBlock list) { bool usedBlockParser = false; foreach (var listItem in list.Items) { // Use the inline parser if there is one paragraph, use the block parser otherwise. var useBlockParser = listItem.Blocks.Count(block => block.Type == MarkdownBlockType.ListItemBuilder) > 1; // Recursively replace any child lists. foreach (var block in listItem.Blocks) { if (block is ListBlock && ReplaceStringBuilders((ListBlock)block)) { useBlockParser = true; } } // Parse the text content of the list items. var newBlockList = new List <MarkdownBlock>(); foreach (var block in listItem.Blocks) { if (block is ListItemBuilder) { var blockText = ((ListItemBuilder)block).Builder.ToString(); if (useBlockParser) { // Parse the list item as a series of blocks. int actualEnd; newBlockList.AddRange(MarkdownDocument.Parse(blockText, 0, blockText.Length, quoteDepth: 0, actualEnd: out actualEnd)); usedBlockParser = true; } else { // Don't allow blocks. var paragraph = new ParagraphBlock(); paragraph.Inlines = Common.ParseInlineChildren(blockText, 0, blockText.Length); newBlockList.Add(paragraph); } } else { newBlockList.Add(block); } } listItem.Blocks = newBlockList; } return(usedBlockParser); }
/// <summary> /// Parses a list block. /// </summary> /// <param name="markdown"> The markdown text. </param> /// <param name="start"> The location of the first character in the block. </param> /// <param name="maxEnd"> The location to stop parsing. </param> /// <param name="quoteDepth"> The current nesting level for block quoting. </param> /// <param name="actualEnd"> Set to the end of the block when the return value is non-null. </param> /// <returns> A parsed list block, or <c>null</c> if this is not a list block. </returns> internal static ListBlock Parse(string markdown, int start, int maxEnd, int quoteDepth, out int actualEnd) { var russianDolls = new List <NestedListInfo>(); int russianDollIndex = -1; bool previousLineWasBlank = false; ListItemBlock currentListItem = null; actualEnd = start; foreach (var lineInfo in Common.ParseLines(markdown, start, maxEnd, quoteDepth)) { // Is this line blank? if (lineInfo.IsLineBlank) { // The line is blank, which means the next line which contains text may end the list (or it may not...). previousLineWasBlank = true; } else { // Does the line contain a list item? ListItemPreamble listItemPreamble = null; if (lineInfo.FirstNonWhitespaceChar - lineInfo.StartOfLine < (russianDollIndex + 2) * 4) { listItemPreamble = ParseItemPreamble(markdown, lineInfo.FirstNonWhitespaceChar, lineInfo.EndOfLine); } if (listItemPreamble != null) { // Yes, this line contains a list item. // Determining the nesting level is done as follows: // 1. If this is the first line, then the list is not nested. // 2. If the number of spaces at the start of the line is equal to that of // an existing list, then the nesting level is the same as that list. // 3. Otherwise, if the number of spaces is 0-4, then the nesting level // is one level deep. // 4. Otherwise, if the number of spaces is 5-8, then the nesting level // is two levels deep (but no deeper than one level more than the // previous list item). // 5. Etcetera. ListBlock listToAddTo = null; int spaceCount = lineInfo.FirstNonWhitespaceChar - lineInfo.StartOfLine; russianDollIndex = russianDolls.FindIndex(rd => rd.SpaceCount == spaceCount); if (russianDollIndex >= 0) { // Add the new list item to an existing list. listToAddTo = russianDolls[russianDollIndex].List; // Don't add new list items to items higher up in the list. russianDolls.RemoveRange(russianDollIndex + 1, russianDolls.Count - (russianDollIndex + 1)); } else { russianDollIndex = Math.Max(1, 1 + ((spaceCount - 1) / 4)); if (russianDollIndex < russianDolls.Count) { // Add the new list item to an existing list. listToAddTo = russianDolls[russianDollIndex].List; // Don't add new list items to items higher up in the list. russianDolls.RemoveRange(russianDollIndex + 1, russianDolls.Count - (russianDollIndex + 1)); } else { // Create a new list. listToAddTo = new ListBlock { Style = listItemPreamble.Style, Items = new List <ListItemBlock>() }; if (russianDolls.Count > 0) { currentListItem.Blocks.Add(listToAddTo); } russianDollIndex = russianDolls.Count; russianDolls.Add(new NestedListInfo { List = listToAddTo, SpaceCount = spaceCount }); } } // Add a new list item. currentListItem = new ListItemBlock() { Blocks = new List <MarkdownBlock>() }; listToAddTo.Items.Add(currentListItem); // Add the rest of the line to the builder. AppendTextToListItem(currentListItem, markdown, listItemPreamble.ContentStartPos, lineInfo.EndOfLine); } else { // No, this line contains text. // Is there even a list in progress? if (currentListItem == null) { actualEnd = start; return(null); } // 0 spaces = end of the list. // 1-4 spaces = first level. // 5-8 spaces = second level, etc. if (previousLineWasBlank) { // This is the start of a new paragraph. int spaceCount = lineInfo.FirstNonWhitespaceChar - lineInfo.StartOfLine; if (spaceCount == 0) { break; } russianDollIndex = Math.Min(russianDollIndex, (spaceCount - 1) / 4); ListBlock listToAddTo = russianDolls[russianDollIndex].List; currentListItem = listToAddTo.Items[listToAddTo.Items.Count - 1]; currentListItem.Blocks.Add(new ListItemBuilder()); AppendTextToListItem(currentListItem, markdown, Math.Min(lineInfo.FirstNonWhitespaceChar, lineInfo.StartOfLine + ((russianDollIndex + 1) * 4)), lineInfo.EndOfLine); } else { // Inline text. AppendTextToListItem(currentListItem, markdown, lineInfo.FirstNonWhitespaceChar, lineInfo.EndOfLine); } } // The line was not blank. previousLineWasBlank = false; } // Go to the next line. actualEnd = lineInfo.EndOfLine; } var result = russianDolls[0].List; ReplaceStringBuilders(result); return(result); }
/// <summary> /// Parses a markdown document. /// </summary> /// <param name="markdown"> The markdown text. </param> /// <param name="start"> The position to start parsing. </param> /// <param name="end"> The position to stop parsing. </param> /// <param name="quoteDepth"> The current nesting level for block quoting. </param> /// <param name="actualEnd"> Set to the position at which parsing ended. This can be /// different from <paramref name="end"/> when the parser is being called recursively. /// </param> /// <returns> A list of parsed blocks. </returns> internal static List <MarkdownBlock> Parse(string markdown, int start, int end, int quoteDepth, out int actualEnd) { // We need to parse out the list of blocks. // Some blocks need to start on a new paragraph (code, lists and tables) while other // blocks can start on any line (headers, horizontal rules and quotes). // Text that is outside of any other block becomes a paragraph. var blocks = new List <MarkdownBlock>(); int startOfLine = start; bool lineStartsNewParagraph = true; var paragraphText = new StringBuilder(); // These are needed to parse underline-style header blocks. int previousStartOfLine = start; int previousEndOfLine = start; // Go line by line. while (startOfLine < end) { // Find the first non-whitespace character. int nonSpacePos = startOfLine; char nonSpaceChar = '\0'; int realStartOfLine = startOfLine; // i.e. including quotes. int expectedQuotesRemaining = quoteDepth; while (true) { while (nonSpacePos < end) { char c = markdown[nonSpacePos]; if (c == '\r' || c == '\n') { // The line is either entirely whitespace, or is empty. break; } if (c != ' ' && c != '\t') { // The line has content. nonSpaceChar = c; break; } nonSpacePos++; } // When parsing blocks in a blockquote context, we need to count the number of // quote characters ('>'). If there are less than expected AND this is the // start of a new paragraph, then stop parsing. if (expectedQuotesRemaining == 0) { break; } if (nonSpaceChar == '>') { // Expected block quote characters should be ignored. expectedQuotesRemaining--; nonSpacePos++; nonSpaceChar = '\0'; startOfLine = nonSpacePos; // Ignore the first space after the quote character, if there is one. if (startOfLine < end && markdown[startOfLine] == ' ') { startOfLine++; nonSpacePos++; } } else { // There were less block quote characters than expected. // But it doesn't matter if this is not the start of a new paragraph. if (!lineStartsNewParagraph || nonSpaceChar == '\0') { break; } // This must be the end of the blockquote. End the current paragraph, if any. actualEnd = previousEndOfLine; if (paragraphText.Length > 0) { blocks.Add(ParagraphBlock.Parse(paragraphText.ToString())); } return(blocks); } } // Find the end of the current line. int startOfNextLine; int endOfLine = Common.FindNextSingleNewLine(markdown, nonSpacePos, end, out startOfNextLine); if (nonSpaceChar == '\0') { // The line is empty or nothing but whitespace. lineStartsNewParagraph = true; // End the current paragraph. if (paragraphText.Length > 0) { blocks.Add(ParagraphBlock.Parse(paragraphText.ToString())); paragraphText.Clear(); } } else { // This is a header if the line starts with a hash character, // or if the line starts with '-' or a '=' character and has no other characters. // Or a quote if the line starts with a greater than character (optionally preceded by whitespace). // Or a horizontal rule if the line contains nothing but 3 '*', '-' or '_' characters (with optional whitespace). MarkdownBlock newBlockElement = null; if (nonSpaceChar == '#' && nonSpacePos == startOfLine) { // Hash-prefixed header. newBlockElement = HeaderBlock.ParseHashPrefixedHeader(markdown, startOfLine, endOfLine); } else if ((nonSpaceChar == '-' || nonSpaceChar == '=') && nonSpacePos == startOfLine && paragraphText.Length > 0) { // Underline style header. These are weird because you don't know you've // got one until you've gone past it. // Note: we intentionally deviate from reddit here in that we only // recognize this type of header if the previous line is part of a // paragraph. For example if you have this, the header at the bottom is // ignored: // a|b // -|- // 1|2 // === newBlockElement = HeaderBlock.ParseUnderlineStyleHeader(markdown, previousStartOfLine, previousEndOfLine, startOfLine, endOfLine); if (newBlockElement != null) { // We're going to have to remove the header text from the pending // paragraph by prematurely ending the current paragraph. // We already made sure that there is a paragraph in progress. paragraphText.Length = paragraphText.Length - (previousEndOfLine - previousStartOfLine); } } // These characters overlap with the underline-style header - this check should go after that one. if (newBlockElement == null && (nonSpaceChar == '*' || nonSpaceChar == '-' || nonSpaceChar == '_')) { newBlockElement = HorizontalRuleBlock.Parse(markdown, startOfLine, endOfLine); } if (newBlockElement == null && lineStartsNewParagraph) { // Some block elements must start on a new paragraph (tables, lists and code). int endOfBlock = startOfNextLine; if (nonSpaceChar == '*' || nonSpaceChar == '+' || nonSpaceChar == '-' || (nonSpaceChar >= '0' && nonSpaceChar <= '9')) { newBlockElement = ListBlock.Parse(markdown, realStartOfLine, end, quoteDepth, out endOfBlock); } if (newBlockElement == null && (nonSpacePos > startOfLine || nonSpaceChar == '`')) { newBlockElement = CodeBlock.Parse(markdown, realStartOfLine, end, quoteDepth, out endOfBlock); } if (newBlockElement == null) { newBlockElement = TableBlock.Parse(markdown, realStartOfLine, endOfLine, end, quoteDepth, out endOfBlock); } if (newBlockElement != null) { startOfNextLine = endOfBlock; } } // This check needs to go after the code block check. if (newBlockElement == null && nonSpaceChar == '>') { newBlockElement = QuoteBlock.Parse(markdown, realStartOfLine, end, quoteDepth, out startOfNextLine); } // This check needs to go after the code block check. if (newBlockElement == null && nonSpaceChar == '[') { newBlockElement = LinkReferenceBlock.Parse(markdown, startOfLine, endOfLine); } // Block elements start new paragraphs. lineStartsNewParagraph = newBlockElement != null; if (newBlockElement == null) { // The line contains paragraph text. if (paragraphText.Length > 0) { // If the previous two characters were both spaces, then append a line break. if (paragraphText.Length > 2 && paragraphText[paragraphText.Length - 1] == ' ' && paragraphText[paragraphText.Length - 2] == ' ') { // Replace the two spaces with a line break. paragraphText[paragraphText.Length - 2] = '\r'; paragraphText[paragraphText.Length - 1] = '\n'; } else { paragraphText.Append(" "); } } // Add the last paragraph if we are at the end of the input text. if (startOfNextLine >= end) { if (paragraphText.Length == 0) { // Optimize for single line paragraphs. blocks.Add(ParagraphBlock.Parse(markdown.Substring(startOfLine, endOfLine - startOfLine))); } else { // Slow path. paragraphText.Append(markdown.Substring(startOfLine, endOfLine - startOfLine)); blocks.Add(ParagraphBlock.Parse(paragraphText.ToString())); } } else { paragraphText.Append(markdown.Substring(startOfLine, endOfLine - startOfLine)); } } else { // The line contained a block. End the current paragraph, if any. if (paragraphText.Length > 0) { blocks.Add(ParagraphBlock.Parse(paragraphText.ToString())); paragraphText.Clear(); } blocks.Add(newBlockElement); } } // Repeat. previousStartOfLine = startOfLine; previousEndOfLine = endOfLine; startOfLine = startOfNextLine; } actualEnd = startOfLine; return(blocks); }