示例#1
0
        private void ParseStart(string terminatorExpr, bool overridesTerminator)
        {
            var context = new ParsingContext(terminatorExpr == null ? null : Terminator.Get(terminatorExpr),
                                             overridesTerminator, position, lineNumber, linePosition);

            contextStack.Push(context);
        }
示例#2
0
        private WikiLink ParseWikiLink()
        {
            // Note that wikilink cannot nest itself.
            ParseStart(@"\||\n|\[\[|\]\]", true);
            if (ConsumeToken(@"\[\[") == null)
            {
                return(ParseFailed <WikiLink>());
            }
            var target = new Run();

            if (!ParseRun(RunParsingMode.ExpandableText, target, true))
            {
                if (options.AllowEmptyWikiLinkTarget)
                {
                    target = null;
                }
                else
                {
                    return(ParseFailed <WikiLink>());
                }
            }
            var node = new WikiLink {
                Target = target
            };

            if (ConsumeToken(@"\|") != null)
            {
                var text = new Run();
                // Text accepts pipe
                CurrentContext.Terminator = Terminator.Get(@"\n|\[\[|\]\]");
                // For [[target|]], Text == Empty Run
                // For [[target]], Text == null
                if (ParseRun(RunParsingMode.ExpandableText, text, true))
                {
                    node.Text = text;
                }
            }
            if (ConsumeToken(@"\]\]") == null)
            {
                if (options.AllowClosingMarkInference)
                {
                    node.SetInferredClosingMark();
                }
                else
                {
                    return(ParseFailed <WikiLink>());
                }
            }
            return(ParseSuccessful(node));
        }
示例#3
0
        private Wikitext ParseAttributeValue(ValueQuoteType quoteType)
        {
            Wikitext node;

            ParseStart(null, true);
            switch (quoteType)
            {
            case ValueQuoteType.None:
                CurrentContext.Terminator = Terminator.Get(@"[>\s]|/>");
                node = ParseWikitext();
                return(ParseSuccessful(node, false));

            case ValueQuoteType.SingleQuotes:
                if (ConsumeToken("\'") != null)
                {
                    // Still, no right angle brackets are allowed
                    CurrentContext.Terminator = Terminator.Get("[>\']|/>");
                    node = ParseWikitext();
                    if (ConsumeToken("\'(?=\\s|>)") != null)
                    {
                        return(ParseSuccessful(node, false));
                    }
                    // Otherwise, we're facing something like
                    // <tag attr='value'value>
                    // Treat it as unquoted text.
                }
                break;

            case ValueQuoteType.DoubleQuotes:
                if (ConsumeToken("\"") != null)
                {
                    // Still, no right angle brackets are allowed
                    CurrentContext.Terminator = Terminator.Get("[>\"]|/>");
                    node = ParseWikitext();
                    if (ConsumeToken("\"(?=\\s|>)") != null)
                    {
                        return(ParseSuccessful(node, false));
                    }
                }
                break;

            default:
                Debug.Assert(false);
                break;
            }
            return(ParseFailed <Wikitext>());
        }
示例#4
0
        /// <summary>
        /// Parses a PARAGRPAH_CLOSE .
        /// </summary>
        /// <param name="lastNode">The lastest parsed node.</param>
        /// <returns>The extra paragraph, or <see cref="EMPTY_LINE_NODE"/>. If parsing attempt failed, <c>null</c>.</returns>
        private LineNode ParseLineEnd(LineNode lastNode)
        {
            Debug.Assert(lastNode != null);
            var unclosedParagraph = lastNode as Paragraph;

            if (unclosedParagraph != null && !unclosedParagraph.Compact)
            {
                unclosedParagraph = null;
            }
            // 2 line breaks (\n\n) or \n Terminator closes the paragraph,
            // so do a look-ahead here. Note that a \n will be consumed in ParseWikitext .
            // Note that this look-ahead will also bypass the \n terminator defined in WIKITEXT

            // For the last non-empty line
            // TERM     Terminators
            // PC       Compact/unclosed paragraph
            // P        Closed paragraph
            // abc TERM             PC[|abc|]
            // abc\n TERM           P[|abc|]
            // abc\n\s*?\n TERM     P[|abc|]PC[||]
            // Note that MediaWiki editor will automatically trim the trailing whitespaces,
            // leaving a \n after the content. This one \n will be removed when the page is transcluded.
            var lastLinePosition = linePosition;

            // Here we consume a \n without fallback.
            if (ConsumeToken(@"\n") == null)
            {
                return(null);
            }
            ParseStart();
            // Whitespaces between 2 \n, assuming there's a second \n or TERM after trailingWs
            var trailingWs = ConsumeToken(@"[\f\r\t\v\x85\p{Z}]+");

            if (unclosedParagraph != null)
            {
                // We're going to consume another \n or TERM to close the paragraph.
                // Already consumed a \n, attempt to consume another \n
                var trailingWsEndsAt = linePosition;
                if (ConsumeToken(@"\n") != null)
                {
                    // Close the last paragraph.
                    unclosedParagraph.AppendWithLineInfo("\n" + trailingWs,
                                                         // don't forget the position of leading '\n'
                                                         CurrentContext.StartingLineNumber - 1, lastLinePosition,
                                                         CurrentContext.StartingLineNumber, trailingWsEndsAt);
                    // 2 Line breaks received.
                    // Check for the special case. Note here TERM excludes \n
                    if (NeedsTerminate(Terminator.Get(@"\n")))
                    {
                        // This is a special case.
                        // abc \n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||]
                        //                      ^ We are here.
                        // When the function returns, WIKITEXT parsing will stop
                        // because a TERM will be received.
                        // We need to correct this.
                        var anotherparagraph = new Paragraph();
                        anotherparagraph.SetLineInfo(lineNumber, linePosition, lineNumber, linePosition);
                        return(ParseSuccessful(anotherparagraph, false));
                    }
                    // The last paragraph will be closed now.
                    return(ParseSuccessful(EMPTY_LINE_NODE, false));
                }
                // The attempt to consume the 2nd \n failed.
                if (NeedsTerminate())
                {
                    // abc \n trailingWs TERM   P[|abc|]
                    //                   ^ We are here.
                    // If we need to terminate, then close the last paragraph.
                    unclosedParagraph.AppendWithLineInfo("\n" + trailingWs,
                                                         // don't forget the position of leading '\n'
                                                         CurrentContext.StartingLineNumber - 1, lastLinePosition,
                                                         lineNumber, linePosition);
                    return(ParseSuccessful(EMPTY_LINE_NODE, false));
                }
                // The last paragraph is still not closed (i.e. compact paragraph).
                // (A)
                // Note here we have still consumed the first '\n', while the last paragraph has no trailing '\n'.
                // For continued PlainText, we will add a '\n' in ParseCompactParagraph.
                // Add an empty node so ParseCompactParagraph can add a '\n' with LineInfo.
                unclosedParagraph.AppendWithLineInfo("", CurrentContext.StartingLineNumber - 1, lastLinePosition,
                                                     CurrentContext.StartingLineNumber - 1, lastLinePosition);
                // Fallback so we can either continue parsing PlainText,
                // or discover the next, for example, Heading, and leave the last paragraph compact.
                Fallback();
                return(EMPTY_LINE_NODE);
            }
            else
            {
                // Last node cannot be a closed paragraph.
                // It can't because ParseLineEnd is invoked immediately after a last node is parsed,
                // and only ParseLineEnd can close a paragraph.
                Debug.Assert(!(lastNode is Paragraph), "Last node cannot be a closed paragraph.");
                // Rather, last node is LINE node of other type (LIST_ITEM/HEADING).
                // Remember we've already consumed a '\n' , and the spaces after it.
                // The situation here is just like the "special case" mentioned above.
                if (NeedsTerminate(Terminator.Get(@"\n")))
                {
                    // abc \n WHITE_SPACE TERM  -->  [|abc|] PC[|WHITE_SPACE|]
                    //        ^ CurCntxt  ^ We are here now.
                    // Note here TERM excludes \n
                    var anotherparagraph = new Paragraph();
                    if (trailingWs != null)
                    {
                        var pt = new PlainText(trailingWs);
                        // Actually the same as what we do in ParseSuccessful for PlainText.
                        pt.SetLineInfo(CurrentContext.StartingLineNumber, CurrentContext.StartingLinePosition,
                                       lineNumber, linePosition);
                        anotherparagraph.Inlines.Add(pt);
                    }
                    return(ParseSuccessful(anotherparagraph));
                }
            }
            // abc \n def
            // That's not the end of a prargraph. Fallback to before the 1st \n .
            // Note here we have already consumed a \n .
            Fallback();
            return(EMPTY_LINE_NODE);
        }
示例#5
0
        private ExternalLink ParseExternalLink()
        {
            ParseStart(@"[\s\]\|]", true);
            var brackets = ConsumeToken(@"\[") != null;
            // Parse target
            Run target;

            if (brackets)
            {
                target = new Run();
                // Aggressive
                if (!ParseRun(RunParsingMode.ExpandableUrl, target, true))
                {
                    if (options.AllowEmptyExternalLinkTarget)
                    {
                        target = null;
                    }
                    else
                    {
                        return(ParseFailed <ExternalLink>());
                    }
                }
            }
            else
            {
                // Conservative
                var url = ParseUrlText();
                if (url != null)
                {
                    target = new Run(url);
                    target.SetLineInfo(url);
                }
                else
                {
                    return(ParseFailed <ExternalLink>());
                }
            }
            var node = new ExternalLink {
                Target = target, Brackets = brackets
            };

            if (brackets)
            {
                // Parse text
                if (ConsumeToken(@"[ \t]") != null)
                {
                    CurrentContext.Terminator = Terminator.Get(@"[\]\n]");
                    var text = new Run();
                    // For [http://target  ], Text == " "
                    // For [http://target ], Text == Empty Run
                    // For [http://target], Text == null
                    if (ParseRun(RunParsingMode.Run, text, true))
                    {
                        node.Text = text;
                    }
                }
                if (ConsumeToken(@"\]") == null)
                {
                    return(ParseFailed(node));
                }
            }
            return(ParseSuccessful(node));
        }