/// <summary> /// Parses a PARAGRPAH_CLOSE . /// </summary> /// <param name="lastNode">The lastest parsed node.</param> /// <returns>The extra paragraph, or <see cref="EMPTY_LINE_NODE"/>. If parsing attempt failed, <c>null</c>.</returns> private LineNode ParseLineEnd(LineNode lastNode) { Debug.Assert(lastNode != null); var unclosedParagraph = lastNode as Paragraph; if (unclosedParagraph != null && !unclosedParagraph.Compact) { unclosedParagraph = null; } // 2 line breaks (\n\n) or \n Terminator closes the paragraph, // so do a look-ahead here. Note that a \n will be consumed in ParseWikitext . // Note that this look-ahead will also bypass the \n terminator defined in WIKITEXT // For the last non-empty line // TERM Terminators // PC Compact/unclosed paragraph // P Closed paragraph // abc TERM PC[|abc|] // abc\n TERM P[|abc|] // abc\n\s*?\n TERM P[|abc|]PC[||] // Note that MediaWiki editor will automatically trim the trailing whitespaces, // leaving a \n after the content. This one \n will be removed when the page is transcluded. var lastLinePosition = linePosition; // Here we consume a \n without fallback. if (ConsumeToken(@"\n") == null) { return(null); } ParseStart(); // Whitespaces between 2 \n, assuming there's a second \n or TERM after trailingWs var trailingWs = ConsumeToken(@"[\f\r\t\v\x85\p{Z}]+"); if (unclosedParagraph != null) { // We're going to consume another \n or TERM to close the paragraph. // Already consumed a \n, attempt to consume another \n var trailingWsEndsAt = linePosition; if (ConsumeToken(@"\n") != null) { // Close the last paragraph. unclosedParagraph.AppendWithLineInfo("\n" + trailingWs, // don't forget the position of leading '\n' CurrentContext.StartingLineNumber - 1, lastLinePosition, CurrentContext.StartingLineNumber, trailingWsEndsAt); // 2 Line breaks received. // Check for the special case. Note here TERM excludes \n if (NeedsTerminate(Terminator.Get(@"\n"))) { // This is a special case. // abc \n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||] // ^ We are here. // When the function returns, WIKITEXT parsing will stop // because a TERM will be received. // We need to correct this. var anotherparagraph = new Paragraph(); anotherparagraph.SetLineInfo(lineNumber, linePosition, lineNumber, linePosition); return(ParseSuccessful(anotherparagraph, false)); } // The last paragraph will be closed now. return(ParseSuccessful(EMPTY_LINE_NODE, false)); } // The attempt to consume the 2nd \n failed. if (NeedsTerminate()) { // abc \n trailingWs TERM P[|abc|] // ^ We are here. // If we need to terminate, then close the last paragraph. unclosedParagraph.AppendWithLineInfo("\n" + trailingWs, // don't forget the position of leading '\n' CurrentContext.StartingLineNumber - 1, lastLinePosition, lineNumber, linePosition); return(ParseSuccessful(EMPTY_LINE_NODE, false)); } // The last paragraph is still not closed (i.e. compact paragraph). // (A) // Note here we have still consumed the first '\n', while the last paragraph has no trailing '\n'. // For continued PlainText, we will add a '\n' in ParseCompactParagraph. // Add an empty node so ParseCompactParagraph can add a '\n' with LineInfo. unclosedParagraph.AppendWithLineInfo("", CurrentContext.StartingLineNumber - 1, lastLinePosition, CurrentContext.StartingLineNumber - 1, lastLinePosition); // Fallback so we can either continue parsing PlainText, // or discover the next, for example, Heading, and leave the last paragraph compact. Fallback(); return(EMPTY_LINE_NODE); } else { // Last node cannot be a closed paragraph. // It can't because ParseLineEnd is invoked immediately after a last node is parsed, // and only ParseLineEnd can close a paragraph. Debug.Assert(!(lastNode is Paragraph), "Last node cannot be a closed paragraph."); // Rather, last node is LINE node of other type (LIST_ITEM/HEADING). // Remember we've already consumed a '\n' , and the spaces after it. // The situation here is just like the "special case" mentioned above. if (NeedsTerminate(Terminator.Get(@"\n"))) { // abc \n WHITE_SPACE TERM --> [|abc|] PC[|WHITE_SPACE|] // ^ CurCntxt ^ We are here now. // Note here TERM excludes \n var anotherparagraph = new Paragraph(); if (trailingWs != null) { var pt = new PlainText(trailingWs); // Actually the same as what we do in ParseSuccessful for PlainText. pt.SetLineInfo(CurrentContext.StartingLineNumber, CurrentContext.StartingLinePosition, lineNumber, linePosition); anotherparagraph.Inlines.Add(pt); } return(ParseSuccessful(anotherparagraph)); } } // abc \n def // That's not the end of a prargraph. Fallback to before the 1st \n . // Note here we have already consumed a \n . Fallback(); return(EMPTY_LINE_NODE); }
/// <summary> /// HEADING /// </summary> private Heading ParseHeading() { // Look ahead to determine the maximum level, assuming the line is a valid heading. var prefix = LookAheadToken("={1,6}"); if (prefix == null) { return(null); } // Note that here we require all the headings terminate with \n or EOF, so this won't be recognized // {{{ARG|== Default Heading ==}}}\n // But this will work // {{{ARG|== Default Heading == // }}} // Note that this should be recognized as heading // == {{T| // arg1}} == // Test different levels of heading for (var level = prefix.Length; level > 0; level--) { var barExpr = "={" + level + "}"; // We use an early-stopping matching pattern // E.g. for ==abc=={{def}}=={{ghi}} // the first call to ParseRun will stop at =={{ // we need to continue parsing, resulting in a list of segments // abc, {{def}}, {{ghi}} var headingTerminator = barExpr + "(?!=)"; ParseStart(headingTerminator, false); // <-- A var temp = ConsumeToken(barExpr); Debug.Assert(temp != null); var node = new Heading(); var parsedSegments = new List <IInlineContainer>(); while (true) { ParseStart(); // <-- B var segment = new Run(); if (!ParseRun(RunParsingMode.Run, segment, true) && LookAheadToken(headingTerminator) == null) { // No more content to parse, and ParseRun stopped by // a terminator that is not a heading terminator // Stop and analyze Fallback(); break; } if (ConsumeToken(barExpr) == null) { // The segment has been parsed, but it's terminated not by "===" // We treat the last segment as suffix // Stop and analyze node.Suffix = segment; Accept(); break; } // Put the run segment into the list. parsedSegments.Add(segment); } if (node.Suffix != null && node.Suffix.Inlines.OfType <PlainText>().Any(pt => !string.IsNullOrWhiteSpace(pt.Content))) { // There shouldn't be non-whitespace plaintext after the heading goto FAIL_CLEANUP; } node.Level = level; // Concatenate segments, adding "===" where needed. for (int i = 0; i < parsedSegments.Count; i++) { node.Inlines.AddFrom(parsedSegments[i].Inlines); if (i < parsedSegments.Count - 1) { var si = (IWikitextLineInfo)parsedSegments[i + 1]; var bar = new PlainText(new string('=', level)); bar.SetLineInfo(si.StartLineNumber, si.StartLinePosition - level, si.StartLineNumber, si.StartLinePosition); node.Inlines.Add(bar); } } if (node.Inlines.Count == 0) { // There should be something as heading content goto FAIL_CLEANUP; } // Move forward // -- B for (int i = 0; i < parsedSegments.Count; i++) { Accept(); } return(ParseSuccessful(node)); // <-- A FAIL_CLEANUP: // -- B for (int i = 0; i < parsedSegments.Count; i++) { Fallback(); } Fallback(); // <-- A } // Failed (E.g. ^=== Title ) return(null); }