Example #1
0
        public static IEnumerable <LineNode> ExtractSection(this Wikitext root, Func <Heading, bool> headingSelector)
        {
            Heading currentHeading = null;

            foreach (var l in root.Lines)
            {
                var h = l as Heading;
                if (h != null)
                {
                    if (currentHeading == null)
                    {
                        if (headingSelector(h))
                        {
                            currentHeading = h;
                            continue;
                        }
                    }
                    else if (currentHeading.Level >= h.Level)
                    {
                        yield break;
                    }
                }
                if (currentHeading != null)
                {
                    yield return(l);
                }
            }
        }
Example #2
0
        public static string ExtractIntro(this Wikitext root)
        {
            var lines = root.Lines.TakeWhile(l => !(l is Heading)).NonEmptyLines();
            var s     = string.Join("\n", lines.Select(l => l.ToPlainText(NodePlainTextOptions.RemoveRefTags).Trim()));

            if (s == "")
            {
                return(null);
            }
            return(s);
        }
Example #3
0
 public LintedWikitextDocument(TextDocument textDocument, Wikitext root, ICollection <Diagnostic> diagnostics)
 {
     if (textDocument == null)
     {
         throw new ArgumentNullException(nameof(textDocument));
     }
     if (root == null)
     {
         throw new ArgumentNullException(nameof(root));
     }
     TextDocument = textDocument;
     _Root        = root;
     Diagnostics  = diagnostics == null || diagnostics.Count == 0 ? Diagnostic.EmptyDiagnostics : diagnostics;
 }
        private Character BuildCat(Wikitext root)
        {
            if (root == null)
            {
                throw new ArgumentNullException(nameof(root));
            }
            var infobox =
                root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Charcat");
            var entity = new Character
            {
                Intro              = root.ExtractIntro(),
                Age                = infobox.Arguments["age"]?.Value.StripText(),
                PastAffiliation    = infobox.Arguments["pastaffie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(),
                CurrentAffiliation = infobox.Arguments["affie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(),
            };

            return(entity);
        }
        /// <summary>
        /// Normalizes and manipulates a template argument name or value.
        /// </summary>
        /// <inheritdoc cref="NormalizeTemplateArgumentText(Wikitext)"/>
        /// <param name="text">The wikitext to be manipulated.</param>
        internal static void NormalizeTemplateArgumentText(Wikitext text)
        {
            if (text.Lines.First() is Paragraph firstParagraph)
            {
                if (firstParagraph.Inlines.First() is PlainText firstPlainText)
                {
                    firstPlainText.Content = firstPlainText.Content.TrimStart();
                }
            }

            if (text.Lines.Last() is Paragraph lastParagraph)
            {
                if (lastParagraph.Inlines.Last() is PlainText lastPlainText)
                {
                    lastPlainText.Content = lastPlainText.Content.TrimEnd();
                }
            }
        }
Example #6
0
 private void Parse()
 {
     if (TextView != null)
     {
         var parser = new WikitextParser();
         var text   = TextView.Dispatcher.AutoInvoke(() => TextView.Document.Text);
         var sw     = Stopwatch.StartNew();
         var ast    = parser.Parse(text);
         Trace.WriteLine("Parsed " + text.Length + " chars in " + sw.Elapsed);
         documentAstInvalidated = false;
         TextView.Dispatcher.BeginInvoke((Action)(() =>
         {
             AstRoot = ast;
             TextView.Redraw();
         }));
     }
     documentAstInvalidated = false;
 }
Example #7
0
        public static bool?IsTom(Wikitext root)
        {
            var introLine = root.Lines.TakeWhile(l => !(l is Heading)).NonEmptyLines().FirstOrDefault();

            if (introLine == null)
            {
                return(null);
            }
            var introContent = introLine.ToPlainText(NodePlainTextOptions.RemoveRefTags);

            if (tomMatcher.IsMatch(introContent))
            {
                return(true);
            }
            if (sheCatMatcher.IsMatch(introContent))
            {
                return(false);
            }
            return(null);
        }
        private Volume BuildVolume(Wikitext root)
        {
            if (root == null)
            {
                throw new ArgumentNullException(nameof(root));
            }
            var infobox =
                root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Book");
            var entity = new Volume
            {
                Intro       = root.ExtractIntro(),
                Author      = infobox.Arguments["author"]?.Value.FirstWikiLink()?.ToPlainText(),
                ReleaseDate = infobox.Arguments["publish date"]?.Value.ToPlainText(NodePlainTextOptions.RemoveRefTags),
            };

            {
                var lines = root.ExtractSection("Blurb").Select(l => l.StripText());
                entity.Blurb = string.Join("\n", lines);
            }
            return(entity);
        }
        private DisambiguationEntity BuildDisambiguation(Wikitext root)
        {
            var items = root.EnumDescendants().OfType <ListItem>().Select(l => Tuple.Create(l, l.FirstWikiLink()))
                        .Where(t => t.Item2 != null).Select(t =>
            {
                var line      = (ListItem)t.Item1.Clone();
                var firstLink = line.FirstWikiLink();
                firstLink.Remove();
                var s = line.StripText().Trim(' ', ',', '?', '.');
                return(new DisambiguationTopic {
                    Target = firstLink.Target.StripText(), Description = s
                });
            }).ToArray();
            var entity = new DisambiguationEntity
            {
                Intro  = root.Lines.FirstOrDefault(l => !(l.Inlines.FirstNode is Template))?.StripText(),
                Topics = items,
            };

            return(entity);
        }
Example #10
0
        /// <summary>
        /// WIKITEXT
        /// </summary>
        /// <remarks>An empty WIKITEXT contains nothing. Thus the parsing should always be successful.</remarks>
        private Wikitext ParseWikitext()
        {
            cancellationToken.ThrowIfCancellationRequested();
            ParseStart();
            var      node     = new Wikitext();
            LineNode lastLine = null;

            if (NeedsTerminate())
            {
                return(ParseSuccessful(node));
            }
NEXT_LINE:
            var line = ParseLine(lastLine);

            if (line != EMPTY_LINE_NODE)
            {
                lastLine = line;
                node.Lines.Add(line);
            }
            var extraPara = ParseLineEnd(lastLine);

            if (extraPara == null)
            {
                // Failed to read a \n , which means we've reached a terminator.
                // This is guaranteed in ParseLineEnd
                Debug.Assert(NeedsTerminate());
                return(ParseSuccessful(node));
            }
            // Otherwise, check whether we meet a terminator before reading another line.
            if (extraPara != EMPTY_LINE_NODE)
            {
                node.Lines.Add(extraPara);
            }
            if (NeedsTerminate())
            {
                return(ParseSuccessful(node));
            }
            goto NEXT_LINE;
        }
Example #11
0
        private IEnumerable <WikiLink> GetMainTemplatePageLinks(IEnumerable <Template> mainTemplates, WikiSite site)
        {
            Wikitext pageText = null;
            IEnumerable <WikiLink> wikiLinks = new List <WikiLink>();

            foreach (Template template in mainTemplates)
            {
                var linkedPage = new WikiPage(site, template.Arguments.First().ToString());
                Log.Information("Processing page {PageName}", linkedPage.Title);
                linkedPage.RefreshAsync(PageQueryOptions.FetchContent | PageQueryOptions.ResolveRedirects).Wait();
                if (linkedPage.Exists)
                {
                    pageText = new WikitextParser().Parse(linkedPage.Content);
                    var matchingPageHeaders = pageText.EnumDescendants().OfType <Heading>().Where(y => HeadersToSearch.Contains(y.ToPlainText()) || HeadersToSearch.Contains(y.ToString()));

                    if (matchingPageHeaders.Any())
                    {
                        wikiLinks = pageText.Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>());
                        break;
                    }
                }
            }
            return(wikiLinks);
        }
Example #12
0
        public static IDictionary <string, IList <string> > ParseFamily(Wikitext familyt, Wikitext familyl)
        {
            if (familyt == null)
            {
                throw new ArgumentNullException(nameof(familyt));
            }
            if (familyl == null)
            {
                throw new ArgumentNullException(nameof(familyl));
            }
            const int charsPerLine = 32;
            var       keyRows      = new List <string>();
            string    curKeyRow    = null;

            void KeysVisitor(Node node)
            {
                switch (node)
                {
                case HtmlTag t:
                    if (string.Equals(t.Name, "br", StringComparison.OrdinalIgnoreCase))
                    {
                        keyRows.Add(curKeyRow);
                        curKeyRow = null;
                    }
                    return;

                case PlainText pt:
                    curKeyRow += pt.Content;
                    return;
                }
                foreach (var c in node.EnumChildren())
                {
                    KeysVisitor(c);
                }
            }

            var valueRows   = new List <IList <string> >();
            var curValueRow = new List <string>();
            var chars       = 0;

            void ValuesVisitor(Node node)
            {
                switch (node)
                {
                case HtmlTag t:
                    if (string.Equals(t.Name, "br", StringComparison.OrdinalIgnoreCase))
                    {
                        valueRows.Add(curValueRow);
                        curValueRow = new List <string>();
                        chars       = 0;
                    }
                    return;

                case WikiLink l:
                    var text = l.ToPlainText().Trim();
                    if (chars > 0 && chars + text.Length + 2 /* ", " */ > charsPerLine)
                    {
                        valueRows.Add(curValueRow);
                        curValueRow = new List <string>();
                        chars       = 0;
                    }
                    curValueRow.Add(l.Target.ToPlainText().Trim());
                    chars += text.Length + 2;
                    return;
                }
                foreach (var c in node.EnumChildren())
                {
                    ValuesVisitor(c);
                }
            }

            KeysVisitor(familyt);
            keyRows.Add(curKeyRow);
            ValuesVisitor(familyl);
            valueRows.Add(curValueRow);
            return(ParseFamily(keyRows, valueRows));
        }
Example #13
0
 public static string ExtractIntro(this Wikitext text)
 {
     return(text.Lines.TakeWhile(l => !(l is Heading))
            .Select(l => l.ToPlainText(NodePlainTextOptions.RemoveRefTags).Trim())
            .FirstOrDefault(l => !string.IsNullOrEmpty(l)));
 }
Example #14
0
        private List <Node> GetContentBetweenHeaders(IEnumerable <Heading> allHeaders, IEnumerable <Heading> headersToGetContentFor, Wikitext wikiPageText)
        {
            List <Node> contentNodes = new List <Node>();

            foreach (Heading header in headersToGetContentFor)
            {
                var sameLevelOrAboveHeaders = allHeaders.Where(x => x.Level <= header.Level);
                var firstChunk = wikiPageText.EnumDescendants().SkipWhile(node => node.ToString() != header.ToString());
                var rangeChunk = firstChunk.TakeWhile(node => node == header || !sameLevelOrAboveHeaders.Contains(node));
                contentNodes.AddRange(rangeChunk);
            }
            return(contentNodes);
        }
Example #15
0
        public async Task <IWikiContainer <IWikiArticle> > GetContainerAsync(IWikiArticle wikiArticle, Wikitext astRoot)
        {
            var wikiData = _wikiContentFactory.CreateWikiData(astRoot.ToPlainText(), ContentModel.WikiText);

            // Create the root of WikiComponent tree.
            var articleContainer = _wikiComponentFactory.CreateArticleContainer(wikiData, wikiArticle, astRoot, astRoot);

            var firstChild = astRoot.Lines.FirstNode;

            // Compose WikiComponent tree.
            var rootChildren = await _astTranslator.TranslateNodesAsync(firstChild);

            if (rootChildren.Any())
            {
                articleContainer.AddChildren(rootChildren);
            }
            else
            {
                _logger.LogWarning("AST translator returned empty list of article descendants.");
            }

            return(articleContainer);
        }
Example #16
0
 public static IEnumerable <LineNode> ExtractSection(this Wikitext root, string heading)
 {
     return(ExtractSection(root, h => h.ToPlainText().Trim()
                           .Equals(heading, StringComparison.CurrentCultureIgnoreCase)));
 }
 private UnknownEntity BuildUnknown(Wikitext root)
 {
     return(new UnknownEntity {
         Intro = root.ExtractIntro()
     });
 }