Esempio n. 1
0
        /// <summary>
        /// Parses a PARAGRPAH_CLOSE .
        /// </summary>
        /// <param name="lastNode">The lastest parsed node.</param>
        /// <returns>The extra paragraph, or <see cref="EMPTY_LINE_NODE"/>. If parsing attempt failed, <c>null</c>.</returns>
        private LineNode ParseLineEnd(LineNode lastNode)
        {
            Debug.Assert(lastNode != null);
            var unclosedParagraph = lastNode as Paragraph;

            if (unclosedParagraph != null && !unclosedParagraph.Compact)
            {
                unclosedParagraph = null;
            }
            // 2 line breaks (\n\n) or \n Terminator closes the paragraph,
            // so do a look-ahead here. Note that a \n will be consumed in ParseWikitext .
            // Note that this look-ahead will also bypass the \n terminator defined in WIKITEXT

            // For the last non-empty line
            // TERM     Terminators
            // PC       Compact/unclosed paragraph
            // P        Closed paragraph
            // abc TERM             PC[|abc|]
            // abc\n TERM           P[|abc|]
            // abc\n\s*?\n TERM     P[|abc|]PC[||]
            // Note that MediaWiki editor will automatically trim the trailing whitespaces,
            // leaving a \n after the content. This one \n will be removed when the page is transcluded.
            var lastLinePosition = linePosition;

            // Here we consume a \n without fallback.
            if (ConsumeToken(@"\n") == null)
            {
                return(null);
            }
            ParseStart();
            // Whitespaces between 2 \n, assuming there's a second \n or TERM after trailingWs
            var trailingWs = ConsumeToken(@"[\f\r\t\v\x85\p{Z}]+");

            if (unclosedParagraph != null)
            {
                // We're going to consume another \n or TERM to close the paragraph.
                // Already consumed a \n, attempt to consume another \n
                var trailingWsEndsAt = linePosition;
                if (ConsumeToken(@"\n") != null)
                {
                    // Close the last paragraph.
                    unclosedParagraph.AppendWithLineInfo("\n" + trailingWs,
                                                         // don't forget the position of leading '\n'
                                                         CurrentContext.StartingLineNumber - 1, lastLinePosition,
                                                         CurrentContext.StartingLineNumber, trailingWsEndsAt);
                    // 2 Line breaks received.
                    // Check for the special case. Note here TERM excludes \n
                    if (NeedsTerminate(Terminator.Get(@"\n")))
                    {
                        // This is a special case.
                        // abc \n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||]
                        //                      ^ We are here.
                        // When the function returns, WIKITEXT parsing will stop
                        // because a TERM will be received.
                        // We need to correct this.
                        var anotherparagraph = new Paragraph();
                        anotherparagraph.SetLineInfo(lineNumber, linePosition, lineNumber, linePosition);
                        return(ParseSuccessful(anotherparagraph, false));
                    }
                    // The last paragraph will be closed now.
                    return(ParseSuccessful(EMPTY_LINE_NODE, false));
                }
                // The attempt to consume the 2nd \n failed.
                if (NeedsTerminate())
                {
                    // abc \n trailingWs TERM   P[|abc|]
                    //                   ^ We are here.
                    // If we need to terminate, then close the last paragraph.
                    unclosedParagraph.AppendWithLineInfo("\n" + trailingWs,
                                                         // don't forget the position of leading '\n'
                                                         CurrentContext.StartingLineNumber - 1, lastLinePosition,
                                                         lineNumber, linePosition);
                    return(ParseSuccessful(EMPTY_LINE_NODE, false));
                }
                // The last paragraph is still not closed (i.e. compact paragraph).
                // (A)
                // Note here we have still consumed the first '\n', while the last paragraph has no trailing '\n'.
                // For continued PlainText, we will add a '\n' in ParseCompactParagraph.
                // Add an empty node so ParseCompactParagraph can add a '\n' with LineInfo.
                unclosedParagraph.AppendWithLineInfo("", CurrentContext.StartingLineNumber - 1, lastLinePosition,
                                                     CurrentContext.StartingLineNumber - 1, lastLinePosition);
                // Fallback so we can either continue parsing PlainText,
                // or discover the next, for example, Heading, and leave the last paragraph compact.
                Fallback();
                return(EMPTY_LINE_NODE);
            }
            else
            {
                // Last node cannot be a closed paragraph.
                // It can't because ParseLineEnd is invoked immediately after a last node is parsed,
                // and only ParseLineEnd can close a paragraph.
                Debug.Assert(!(lastNode is Paragraph), "Last node cannot be a closed paragraph.");
                // Rather, last node is LINE node of other type (LIST_ITEM/HEADING).
                // Remember we've already consumed a '\n' , and the spaces after it.
                // The situation here is just like the "special case" mentioned above.
                if (NeedsTerminate(Terminator.Get(@"\n")))
                {
                    // abc \n WHITE_SPACE TERM  -->  [|abc|] PC[|WHITE_SPACE|]
                    //        ^ CurCntxt  ^ We are here now.
                    // Note here TERM excludes \n
                    var anotherparagraph = new Paragraph();
                    if (trailingWs != null)
                    {
                        var pt = new PlainText(trailingWs);
                        // Actually the same as what we do in ParseSuccessful for PlainText.
                        pt.SetLineInfo(CurrentContext.StartingLineNumber, CurrentContext.StartingLinePosition,
                                       lineNumber, linePosition);
                        anotherparagraph.Inlines.Add(pt);
                    }
                    return(ParseSuccessful(anotherparagraph));
                }
            }
            // abc \n def
            // That's not the end of a prargraph. Fallback to before the 1st \n .
            // Note here we have already consumed a \n .
            Fallback();
            return(EMPTY_LINE_NODE);
        }
Esempio n. 2
0
        /// <summary>
        /// HEADING
        /// </summary>
        private Heading ParseHeading()
        {
            // Look ahead to determine the maximum level, assuming the line is a valid heading.
            var prefix = LookAheadToken("={1,6}");

            if (prefix == null)
            {
                return(null);
            }

            // Note that here we require all the headings terminate with \n or EOF, so this won't be recognized
            // {{{ARG|== Default Heading ==}}}\n
            // But this will work
            // {{{ARG|== Default Heading ==
            // }}}
            // Note that this should be recognized as heading
            //  == {{T|
            //  arg1}} ==

            // Test different levels of heading
            for (var level = prefix.Length; level > 0; level--)
            {
                var barExpr = "={" + level + "}";
                // We use an early-stopping matching pattern
                // E.g. for ==abc=={{def}}=={{ghi}}
                // the first call to ParseRun will stop at =={{
                // we need to continue parsing, resulting in a list of segments
                // abc, {{def}}, {{ghi}}
                var headingTerminator = barExpr + "(?!=)";
                ParseStart(headingTerminator, false);   // <-- A
                var temp = ConsumeToken(barExpr);
                Debug.Assert(temp != null);
                var node           = new Heading();
                var parsedSegments = new List <IInlineContainer>();
                while (true)
                {
                    ParseStart();                       // <-- B
                    var segment = new Run();
                    if (!ParseRun(RunParsingMode.Run, segment, true) &&
                        LookAheadToken(headingTerminator) == null)
                    {
                        // No more content to parse, and ParseRun stopped by
                        // a terminator that is not a heading terminator
                        // Stop and analyze
                        Fallback();
                        break;
                    }
                    if (ConsumeToken(barExpr) == null)
                    {
                        // The segment has been parsed, but it's terminated not by "==="
                        // We treat the last segment as suffix
                        // Stop and analyze
                        node.Suffix = segment;
                        Accept();
                        break;
                    }
                    // Put the run segment into the list.
                    parsedSegments.Add(segment);
                }
                if (node.Suffix != null &&
                    node.Suffix.Inlines.OfType <PlainText>().Any(pt => !string.IsNullOrWhiteSpace(pt.Content)))
                {
                    // There shouldn't be non-whitespace plaintext after the heading
                    goto FAIL_CLEANUP;
                }
                node.Level = level;
                // Concatenate segments, adding "===" where needed.
                for (int i = 0; i < parsedSegments.Count; i++)
                {
                    node.Inlines.AddFrom(parsedSegments[i].Inlines);
                    if (i < parsedSegments.Count - 1)
                    {
                        var si  = (IWikitextLineInfo)parsedSegments[i + 1];
                        var bar = new PlainText(new string('=', level));
                        bar.SetLineInfo(si.StartLineNumber, si.StartLinePosition - level,
                                        si.StartLineNumber, si.StartLinePosition);
                        node.Inlines.Add(bar);
                    }
                }
                if (node.Inlines.Count == 0)
                {
                    // There should be something as heading content
                    goto FAIL_CLEANUP;
                }
                // Move forward
                // -- B
                for (int i = 0; i < parsedSegments.Count; i++)
                {
                    Accept();
                }
                return(ParseSuccessful(node));   // <-- A

FAIL_CLEANUP:
                // -- B
                for (int i = 0; i < parsedSegments.Count; i++)
                {
                    Fallback();
                }
                Fallback();                     // <-- A
            }
            // Failed (E.g. ^=== Title )
            return(null);
        }