private List <Node> GetContentBetweenHeaders(IEnumerable <Heading> allHeaders, IEnumerable <Heading> headersToGetContentFor, Wikitext wikiPageText) { List <Node> contentNodes = new List <Node>(); foreach (Heading header in headersToGetContentFor) { var sameLevelOrAboveHeaders = allHeaders.Where(x => x.Level <= header.Level); var firstChunk = wikiPageText.EnumDescendants().SkipWhile(node => node.ToString() != header.ToString()); var rangeChunk = firstChunk.TakeWhile(node => node == header || !sameLevelOrAboveHeaders.Contains(node)); contentNodes.AddRange(rangeChunk); } return(contentNodes); }
private Character BuildCat(Wikitext root) { if (root == null) { throw new ArgumentNullException(nameof(root)); } var infobox = root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Charcat"); var entity = new Character { Intro = root.ExtractIntro(), Age = infobox.Arguments["age"]?.Value.StripText(), PastAffiliation = infobox.Arguments["pastaffie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(), CurrentAffiliation = infobox.Arguments["affie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(), }; return(entity); }
private DisambiguationEntity BuildDisambiguation(Wikitext root) { var items = root.EnumDescendants().OfType <ListItem>().Select(l => Tuple.Create(l, l.FirstWikiLink())) .Where(t => t.Item2 != null).Select(t => { var line = (ListItem)t.Item1.Clone(); var firstLink = line.FirstWikiLink(); firstLink.Remove(); var s = line.StripText().Trim(' ', ',', '?', '.'); return(new DisambiguationTopic { Target = firstLink.Target.StripText(), Description = s }); }).ToArray(); var entity = new DisambiguationEntity { Intro = root.Lines.FirstOrDefault(l => !(l.Inlines.FirstNode is Template))?.StripText(), Topics = items, }; return(entity); }
private Volume BuildVolume(Wikitext root) { if (root == null) { throw new ArgumentNullException(nameof(root)); } var infobox = root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Book"); var entity = new Volume { Intro = root.ExtractIntro(), Author = infobox.Arguments["author"]?.Value.FirstWikiLink()?.ToPlainText(), ReleaseDate = infobox.Arguments["publish date"]?.Value.ToPlainText(NodePlainTextOptions.RemoveRefTags), }; { var lines = root.ExtractSection("Blurb").Select(l => l.StripText()); entity.Blurb = string.Join("\n", lines); } return(entity); }
/// <summary> /// Infers linked/transcluded pages information, and stores it into global store. /// </summary> public int InferTemplateInformation(PageInfoStore store) { if (store == null) { throw new ArgumentNullException(nameof(store)); } // template, argument var dict = new Dictionary <string, Dictionary <string, TemplateArgumentInfo> >(); foreach (var template in _Root.EnumDescendants().OfType <Template>()) { if (template.IsMagicWord) { continue; } var name = template.Name?.ToString(); if (string.IsNullOrEmpty(name)) { continue; } name = MwParserUtility.NormalizeTitle(name); if (name.Contains('{') || name.Contains('}')) { continue; } name = Utility.ExpandTransclusionTitle(name); // Start to infer it. if (!dict.TryGetValue(name, out var parameters)) { if (store.ContainsPageInfo(name)) { continue; } parameters = new Dictionary <string, TemplateArgumentInfo>(); dict.Add(name, parameters); } foreach (var p in template.Arguments.EnumNameArgumentPairs()) { if (parameters.ContainsKey(p.Key)) { continue; } // TODO: Insert documentation here. parameters.Add(p.Key, new TemplateArgumentInfo(p.Key, null)); } } foreach (var p in dict) { var isTemplate = Utility.IsTemplateTitle(p.Key); string transclusionName; if (isTemplate) { Debug.Assert(p.Key.StartsWith("Template:")); transclusionName = p.Key.Substring(9); } else { transclusionName = ":" + p.Key; } store.UpdatePageInfo(new PageInfo(p.Key, transclusionName, null, Prompts.InferredPageInfo, p.Value.OrderBy(p1 => p1.Key, TemplateArgumentNameComparer.Default) .Select(p1 => p1.Value).ToArray(), isTemplate ? PageType.Template : PageType.Page, true)); } return(dict.Count); }