private void ParseStart(string terminatorExpr, bool overridesTerminator) { var context = new ParsingContext(terminatorExpr == null ? null : Terminator.Get(terminatorExpr), overridesTerminator, position, lineNumber, linePosition); contextStack.Push(context); }
private WikiLink ParseWikiLink() { // Note that wikilink cannot nest itself. ParseStart(@"\||\n|\[\[|\]\]", true); if (ConsumeToken(@"\[\[") == null) { return(ParseFailed <WikiLink>()); } var target = new Run(); if (!ParseRun(RunParsingMode.ExpandableText, target, true)) { if (options.AllowEmptyWikiLinkTarget) { target = null; } else { return(ParseFailed <WikiLink>()); } } var node = new WikiLink { Target = target }; if (ConsumeToken(@"\|") != null) { var text = new Run(); // Text accepts pipe CurrentContext.Terminator = Terminator.Get(@"\n|\[\[|\]\]"); // For [[target|]], Text == Empty Run // For [[target]], Text == null if (ParseRun(RunParsingMode.ExpandableText, text, true)) { node.Text = text; } } if (ConsumeToken(@"\]\]") == null) { if (options.AllowClosingMarkInference) { node.SetInferredClosingMark(); } else { return(ParseFailed <WikiLink>()); } } return(ParseSuccessful(node)); }
private Wikitext ParseAttributeValue(ValueQuoteType quoteType) { Wikitext node; ParseStart(null, true); switch (quoteType) { case ValueQuoteType.None: CurrentContext.Terminator = Terminator.Get(@"[>\s]|/>"); node = ParseWikitext(); return(ParseSuccessful(node, false)); case ValueQuoteType.SingleQuotes: if (ConsumeToken("\'") != null) { // Still, no right angle brackets are allowed CurrentContext.Terminator = Terminator.Get("[>\']|/>"); node = ParseWikitext(); if (ConsumeToken("\'(?=\\s|>)") != null) { return(ParseSuccessful(node, false)); } // Otherwise, we're facing something like // <tag attr='value'value> // Treat it as unquoted text. } break; case ValueQuoteType.DoubleQuotes: if (ConsumeToken("\"") != null) { // Still, no right angle brackets are allowed CurrentContext.Terminator = Terminator.Get("[>\"]|/>"); node = ParseWikitext(); if (ConsumeToken("\"(?=\\s|>)") != null) { return(ParseSuccessful(node, false)); } } break; default: Debug.Assert(false); break; } return(ParseFailed <Wikitext>()); }
/// <summary> /// Parses a PARAGRPAH_CLOSE . /// </summary> /// <param name="lastNode">The lastest parsed node.</param> /// <returns>The extra paragraph, or <see cref="EMPTY_LINE_NODE"/>. If parsing attempt failed, <c>null</c>.</returns> private LineNode ParseLineEnd(LineNode lastNode) { Debug.Assert(lastNode != null); var unclosedParagraph = lastNode as Paragraph; if (unclosedParagraph != null && !unclosedParagraph.Compact) { unclosedParagraph = null; } // 2 line breaks (\n\n) or \n Terminator closes the paragraph, // so do a look-ahead here. Note that a \n will be consumed in ParseWikitext . // Note that this look-ahead will also bypass the \n terminator defined in WIKITEXT // For the last non-empty line // TERM Terminators // PC Compact/unclosed paragraph // P Closed paragraph // abc TERM PC[|abc|] // abc\n TERM P[|abc|] // abc\n\s*?\n TERM P[|abc|]PC[||] // Note that MediaWiki editor will automatically trim the trailing whitespaces, // leaving a \n after the content. This one \n will be removed when the page is transcluded. var lastLinePosition = linePosition; // Here we consume a \n without fallback. if (ConsumeToken(@"\n") == null) { return(null); } ParseStart(); // Whitespaces between 2 \n, assuming there's a second \n or TERM after trailingWs var trailingWs = ConsumeToken(@"[\f\r\t\v\x85\p{Z}]+"); if (unclosedParagraph != null) { // We're going to consume another \n or TERM to close the paragraph. // Already consumed a \n, attempt to consume another \n var trailingWsEndsAt = linePosition; if (ConsumeToken(@"\n") != null) { // Close the last paragraph. unclosedParagraph.AppendWithLineInfo("\n" + trailingWs, // don't forget the position of leading '\n' CurrentContext.StartingLineNumber - 1, lastLinePosition, CurrentContext.StartingLineNumber, trailingWsEndsAt); // 2 Line breaks received. // Check for the special case. Note here TERM excludes \n if (NeedsTerminate(Terminator.Get(@"\n"))) { // This is a special case. // abc \n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||] // ^ We are here. // When the function returns, WIKITEXT parsing will stop // because a TERM will be received. // We need to correct this. var anotherparagraph = new Paragraph(); anotherparagraph.SetLineInfo(lineNumber, linePosition, lineNumber, linePosition); return(ParseSuccessful(anotherparagraph, false)); } // The last paragraph will be closed now. return(ParseSuccessful(EMPTY_LINE_NODE, false)); } // The attempt to consume the 2nd \n failed. if (NeedsTerminate()) { // abc \n trailingWs TERM P[|abc|] // ^ We are here. // If we need to terminate, then close the last paragraph. unclosedParagraph.AppendWithLineInfo("\n" + trailingWs, // don't forget the position of leading '\n' CurrentContext.StartingLineNumber - 1, lastLinePosition, lineNumber, linePosition); return(ParseSuccessful(EMPTY_LINE_NODE, false)); } // The last paragraph is still not closed (i.e. compact paragraph). // (A) // Note here we have still consumed the first '\n', while the last paragraph has no trailing '\n'. // For continued PlainText, we will add a '\n' in ParseCompactParagraph. // Add an empty node so ParseCompactParagraph can add a '\n' with LineInfo. unclosedParagraph.AppendWithLineInfo("", CurrentContext.StartingLineNumber - 1, lastLinePosition, CurrentContext.StartingLineNumber - 1, lastLinePosition); // Fallback so we can either continue parsing PlainText, // or discover the next, for example, Heading, and leave the last paragraph compact. Fallback(); return(EMPTY_LINE_NODE); } else { // Last node cannot be a closed paragraph. // It can't because ParseLineEnd is invoked immediately after a last node is parsed, // and only ParseLineEnd can close a paragraph. Debug.Assert(!(lastNode is Paragraph), "Last node cannot be a closed paragraph."); // Rather, last node is LINE node of other type (LIST_ITEM/HEADING). // Remember we've already consumed a '\n' , and the spaces after it. // The situation here is just like the "special case" mentioned above. if (NeedsTerminate(Terminator.Get(@"\n"))) { // abc \n WHITE_SPACE TERM --> [|abc|] PC[|WHITE_SPACE|] // ^ CurCntxt ^ We are here now. // Note here TERM excludes \n var anotherparagraph = new Paragraph(); if (trailingWs != null) { var pt = new PlainText(trailingWs); // Actually the same as what we do in ParseSuccessful for PlainText. pt.SetLineInfo(CurrentContext.StartingLineNumber, CurrentContext.StartingLinePosition, lineNumber, linePosition); anotherparagraph.Inlines.Add(pt); } return(ParseSuccessful(anotherparagraph)); } } // abc \n def // That's not the end of a prargraph. Fallback to before the 1st \n . // Note here we have already consumed a \n . Fallback(); return(EMPTY_LINE_NODE); }
private ExternalLink ParseExternalLink() { ParseStart(@"[\s\]\|]", true); var brackets = ConsumeToken(@"\[") != null; // Parse target Run target; if (brackets) { target = new Run(); // Aggressive if (!ParseRun(RunParsingMode.ExpandableUrl, target, true)) { if (options.AllowEmptyExternalLinkTarget) { target = null; } else { return(ParseFailed <ExternalLink>()); } } } else { // Conservative var url = ParseUrlText(); if (url != null) { target = new Run(url); target.SetLineInfo(url); } else { return(ParseFailed <ExternalLink>()); } } var node = new ExternalLink { Target = target, Brackets = brackets }; if (brackets) { // Parse text if (ConsumeToken(@"[ \t]") != null) { CurrentContext.Terminator = Terminator.Get(@"[\]\n]"); var text = new Run(); // For [http://target ], Text == " " // For [http://target ], Text == Empty Run // For [http://target], Text == null if (ParseRun(RunParsingMode.Run, text, true)) { node.Text = text; } } if (ConsumeToken(@"\]") == null) { return(ParseFailed(node)); } } return(ParseSuccessful(node)); }