public static IEnumerable <LineNode> ExtractSection(this Wikitext root, Func <Heading, bool> headingSelector) { Heading currentHeading = null; foreach (var l in root.Lines) { var h = l as Heading; if (h != null) { if (currentHeading == null) { if (headingSelector(h)) { currentHeading = h; continue; } } else if (currentHeading.Level >= h.Level) { yield break; } } if (currentHeading != null) { yield return(l); } } }
public static string ExtractIntro(this Wikitext root) { var lines = root.Lines.TakeWhile(l => !(l is Heading)).NonEmptyLines(); var s = string.Join("\n", lines.Select(l => l.ToPlainText(NodePlainTextOptions.RemoveRefTags).Trim())); if (s == "") { return(null); } return(s); }
public LintedWikitextDocument(TextDocument textDocument, Wikitext root, ICollection <Diagnostic> diagnostics) { if (textDocument == null) { throw new ArgumentNullException(nameof(textDocument)); } if (root == null) { throw new ArgumentNullException(nameof(root)); } TextDocument = textDocument; _Root = root; Diagnostics = diagnostics == null || diagnostics.Count == 0 ? Diagnostic.EmptyDiagnostics : diagnostics; }
private Character BuildCat(Wikitext root) { if (root == null) { throw new ArgumentNullException(nameof(root)); } var infobox = root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Charcat"); var entity = new Character { Intro = root.ExtractIntro(), Age = infobox.Arguments["age"]?.Value.StripText(), PastAffiliation = infobox.Arguments["pastaffie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(), CurrentAffiliation = infobox.Arguments["affie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(), }; return(entity); }
/// <summary> /// Normalizes and manipulates a template argument name or value. /// </summary> /// <inheritdoc cref="NormalizeTemplateArgumentText(Wikitext)"/> /// <param name="text">The wikitext to be manipulated.</param> internal static void NormalizeTemplateArgumentText(Wikitext text) { if (text.Lines.First() is Paragraph firstParagraph) { if (firstParagraph.Inlines.First() is PlainText firstPlainText) { firstPlainText.Content = firstPlainText.Content.TrimStart(); } } if (text.Lines.Last() is Paragraph lastParagraph) { if (lastParagraph.Inlines.Last() is PlainText lastPlainText) { lastPlainText.Content = lastPlainText.Content.TrimEnd(); } } }
private void Parse() { if (TextView != null) { var parser = new WikitextParser(); var text = TextView.Dispatcher.AutoInvoke(() => TextView.Document.Text); var sw = Stopwatch.StartNew(); var ast = parser.Parse(text); Trace.WriteLine("Parsed " + text.Length + " chars in " + sw.Elapsed); documentAstInvalidated = false; TextView.Dispatcher.BeginInvoke((Action)(() => { AstRoot = ast; TextView.Redraw(); })); } documentAstInvalidated = false; }
public static bool?IsTom(Wikitext root) { var introLine = root.Lines.TakeWhile(l => !(l is Heading)).NonEmptyLines().FirstOrDefault(); if (introLine == null) { return(null); } var introContent = introLine.ToPlainText(NodePlainTextOptions.RemoveRefTags); if (tomMatcher.IsMatch(introContent)) { return(true); } if (sheCatMatcher.IsMatch(introContent)) { return(false); } return(null); }
private Volume BuildVolume(Wikitext root) { if (root == null) { throw new ArgumentNullException(nameof(root)); } var infobox = root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Book"); var entity = new Volume { Intro = root.ExtractIntro(), Author = infobox.Arguments["author"]?.Value.FirstWikiLink()?.ToPlainText(), ReleaseDate = infobox.Arguments["publish date"]?.Value.ToPlainText(NodePlainTextOptions.RemoveRefTags), }; { var lines = root.ExtractSection("Blurb").Select(l => l.StripText()); entity.Blurb = string.Join("\n", lines); } return(entity); }
private DisambiguationEntity BuildDisambiguation(Wikitext root) { var items = root.EnumDescendants().OfType <ListItem>().Select(l => Tuple.Create(l, l.FirstWikiLink())) .Where(t => t.Item2 != null).Select(t => { var line = (ListItem)t.Item1.Clone(); var firstLink = line.FirstWikiLink(); firstLink.Remove(); var s = line.StripText().Trim(' ', ',', '?', '.'); return(new DisambiguationTopic { Target = firstLink.Target.StripText(), Description = s }); }).ToArray(); var entity = new DisambiguationEntity { Intro = root.Lines.FirstOrDefault(l => !(l.Inlines.FirstNode is Template))?.StripText(), Topics = items, }; return(entity); }
/// <summary> /// WIKITEXT /// </summary> /// <remarks>An empty WIKITEXT contains nothing. Thus the parsing should always be successful.</remarks> private Wikitext ParseWikitext() { cancellationToken.ThrowIfCancellationRequested(); ParseStart(); var node = new Wikitext(); LineNode lastLine = null; if (NeedsTerminate()) { return(ParseSuccessful(node)); } NEXT_LINE: var line = ParseLine(lastLine); if (line != EMPTY_LINE_NODE) { lastLine = line; node.Lines.Add(line); } var extraPara = ParseLineEnd(lastLine); if (extraPara == null) { // Failed to read a \n , which means we've reached a terminator. // This is guaranteed in ParseLineEnd Debug.Assert(NeedsTerminate()); return(ParseSuccessful(node)); } // Otherwise, check whether we meet a terminator before reading another line. if (extraPara != EMPTY_LINE_NODE) { node.Lines.Add(extraPara); } if (NeedsTerminate()) { return(ParseSuccessful(node)); } goto NEXT_LINE; }
private IEnumerable <WikiLink> GetMainTemplatePageLinks(IEnumerable <Template> mainTemplates, WikiSite site) { Wikitext pageText = null; IEnumerable <WikiLink> wikiLinks = new List <WikiLink>(); foreach (Template template in mainTemplates) { var linkedPage = new WikiPage(site, template.Arguments.First().ToString()); Log.Information("Processing page {PageName}", linkedPage.Title); linkedPage.RefreshAsync(PageQueryOptions.FetchContent | PageQueryOptions.ResolveRedirects).Wait(); if (linkedPage.Exists) { pageText = new WikitextParser().Parse(linkedPage.Content); var matchingPageHeaders = pageText.EnumDescendants().OfType <Heading>().Where(y => HeadersToSearch.Contains(y.ToPlainText()) || HeadersToSearch.Contains(y.ToString())); if (matchingPageHeaders.Any()) { wikiLinks = pageText.Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>()); break; } } } return(wikiLinks); }
public static IDictionary <string, IList <string> > ParseFamily(Wikitext familyt, Wikitext familyl) { if (familyt == null) { throw new ArgumentNullException(nameof(familyt)); } if (familyl == null) { throw new ArgumentNullException(nameof(familyl)); } const int charsPerLine = 32; var keyRows = new List <string>(); string curKeyRow = null; void KeysVisitor(Node node) { switch (node) { case HtmlTag t: if (string.Equals(t.Name, "br", StringComparison.OrdinalIgnoreCase)) { keyRows.Add(curKeyRow); curKeyRow = null; } return; case PlainText pt: curKeyRow += pt.Content; return; } foreach (var c in node.EnumChildren()) { KeysVisitor(c); } } var valueRows = new List <IList <string> >(); var curValueRow = new List <string>(); var chars = 0; void ValuesVisitor(Node node) { switch (node) { case HtmlTag t: if (string.Equals(t.Name, "br", StringComparison.OrdinalIgnoreCase)) { valueRows.Add(curValueRow); curValueRow = new List <string>(); chars = 0; } return; case WikiLink l: var text = l.ToPlainText().Trim(); if (chars > 0 && chars + text.Length + 2 /* ", " */ > charsPerLine) { valueRows.Add(curValueRow); curValueRow = new List <string>(); chars = 0; } curValueRow.Add(l.Target.ToPlainText().Trim()); chars += text.Length + 2; return; } foreach (var c in node.EnumChildren()) { ValuesVisitor(c); } } KeysVisitor(familyt); keyRows.Add(curKeyRow); ValuesVisitor(familyl); valueRows.Add(curValueRow); return(ParseFamily(keyRows, valueRows)); }
public static string ExtractIntro(this Wikitext text) { return(text.Lines.TakeWhile(l => !(l is Heading)) .Select(l => l.ToPlainText(NodePlainTextOptions.RemoveRefTags).Trim()) .FirstOrDefault(l => !string.IsNullOrEmpty(l))); }
private List <Node> GetContentBetweenHeaders(IEnumerable <Heading> allHeaders, IEnumerable <Heading> headersToGetContentFor, Wikitext wikiPageText) { List <Node> contentNodes = new List <Node>(); foreach (Heading header in headersToGetContentFor) { var sameLevelOrAboveHeaders = allHeaders.Where(x => x.Level <= header.Level); var firstChunk = wikiPageText.EnumDescendants().SkipWhile(node => node.ToString() != header.ToString()); var rangeChunk = firstChunk.TakeWhile(node => node == header || !sameLevelOrAboveHeaders.Contains(node)); contentNodes.AddRange(rangeChunk); } return(contentNodes); }
public async Task <IWikiContainer <IWikiArticle> > GetContainerAsync(IWikiArticle wikiArticle, Wikitext astRoot) { var wikiData = _wikiContentFactory.CreateWikiData(astRoot.ToPlainText(), ContentModel.WikiText); // Create the root of WikiComponent tree. var articleContainer = _wikiComponentFactory.CreateArticleContainer(wikiData, wikiArticle, astRoot, astRoot); var firstChild = astRoot.Lines.FirstNode; // Compose WikiComponent tree. var rootChildren = await _astTranslator.TranslateNodesAsync(firstChild); if (rootChildren.Any()) { articleContainer.AddChildren(rootChildren); } else { _logger.LogWarning("AST translator returned empty list of article descendants."); } return(articleContainer); }
public static IEnumerable <LineNode> ExtractSection(this Wikitext root, string heading) { return(ExtractSection(root, h => h.ToPlainText().Trim() .Equals(heading, StringComparison.CurrentCultureIgnoreCase))); }
private UnknownEntity BuildUnknown(Wikitext root) { return(new UnknownEntity { Intro = root.ExtractIntro() }); }