Beispiel #1
0
        private List <Node> GetContentBetweenHeaders(IEnumerable <Heading> allHeaders, IEnumerable <Heading> headersToGetContentFor, Wikitext wikiPageText)
        {
            List <Node> contentNodes = new List <Node>();

            foreach (Heading header in headersToGetContentFor)
            {
                var sameLevelOrAboveHeaders = allHeaders.Where(x => x.Level <= header.Level);
                var firstChunk = wikiPageText.EnumDescendants().SkipWhile(node => node.ToString() != header.ToString());
                var rangeChunk = firstChunk.TakeWhile(node => node == header || !sameLevelOrAboveHeaders.Contains(node));
                contentNodes.AddRange(rangeChunk);
            }
            return(contentNodes);
        }
        private Character BuildCat(Wikitext root)
        {
            if (root == null)
            {
                throw new ArgumentNullException(nameof(root));
            }
            var infobox =
                root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Charcat");
            var entity = new Character
            {
                Intro              = root.ExtractIntro(),
                Age                = infobox.Arguments["age"]?.Value.StripText(),
                PastAffiliation    = infobox.Arguments["pastaffie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(),
                CurrentAffiliation = infobox.Arguments["affie"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.StripText()).ToArray(),
            };

            return(entity);
        }
        private DisambiguationEntity BuildDisambiguation(Wikitext root)
        {
            var items = root.EnumDescendants().OfType <ListItem>().Select(l => Tuple.Create(l, l.FirstWikiLink()))
                        .Where(t => t.Item2 != null).Select(t =>
            {
                var line      = (ListItem)t.Item1.Clone();
                var firstLink = line.FirstWikiLink();
                firstLink.Remove();
                var s = line.StripText().Trim(' ', ',', '?', '.');
                return(new DisambiguationTopic {
                    Target = firstLink.Target.StripText(), Description = s
                });
            }).ToArray();
            var entity = new DisambiguationEntity
            {
                Intro  = root.Lines.FirstOrDefault(l => !(l.Inlines.FirstNode is Template))?.StripText(),
                Topics = items,
            };

            return(entity);
        }
        private Volume BuildVolume(Wikitext root)
        {
            if (root == null)
            {
                throw new ArgumentNullException(nameof(root));
            }
            var infobox =
                root.EnumDescendants().OfType <Template>().First(t => Utility.NormalizeTitle(t.Name) == "Book");
            var entity = new Volume
            {
                Intro       = root.ExtractIntro(),
                Author      = infobox.Arguments["author"]?.Value.FirstWikiLink()?.ToPlainText(),
                ReleaseDate = infobox.Arguments["publish date"]?.Value.ToPlainText(NodePlainTextOptions.RemoveRefTags),
            };

            {
                var lines = root.ExtractSection("Blurb").Select(l => l.StripText());
                entity.Blurb = string.Join("\n", lines);
            }
            return(entity);
        }
Beispiel #5
0
        /// <summary>
        /// Infers linked/transcluded pages information, and stores it into global store.
        /// </summary>
        public int InferTemplateInformation(PageInfoStore store)
        {
            if (store == null)
            {
                throw new ArgumentNullException(nameof(store));
            }
            // template, argument
            var dict = new Dictionary <string, Dictionary <string, TemplateArgumentInfo> >();

            foreach (var template in _Root.EnumDescendants().OfType <Template>())
            {
                if (template.IsMagicWord)
                {
                    continue;
                }
                var name = template.Name?.ToString();
                if (string.IsNullOrEmpty(name))
                {
                    continue;
                }
                name = MwParserUtility.NormalizeTitle(name);
                if (name.Contains('{') || name.Contains('}'))
                {
                    continue;
                }
                name = Utility.ExpandTransclusionTitle(name);
                // Start to infer it.
                if (!dict.TryGetValue(name, out var parameters))
                {
                    if (store.ContainsPageInfo(name))
                    {
                        continue;
                    }
                    parameters = new Dictionary <string, TemplateArgumentInfo>();
                    dict.Add(name, parameters);
                }
                foreach (var p in template.Arguments.EnumNameArgumentPairs())
                {
                    if (parameters.ContainsKey(p.Key))
                    {
                        continue;
                    }
                    // TODO: Insert documentation here.
                    parameters.Add(p.Key, new TemplateArgumentInfo(p.Key, null));
                }
            }
            foreach (var p in dict)
            {
                var    isTemplate = Utility.IsTemplateTitle(p.Key);
                string transclusionName;
                if (isTemplate)
                {
                    Debug.Assert(p.Key.StartsWith("Template:"));
                    transclusionName = p.Key.Substring(9);
                }
                else
                {
                    transclusionName = ":" + p.Key;
                }
                store.UpdatePageInfo(new PageInfo(p.Key, transclusionName, null, Prompts.InferredPageInfo,
                                                  p.Value.OrderBy(p1 => p1.Key, TemplateArgumentNameComparer.Default)
                                                  .Select(p1 => p1.Value).ToArray(), isTemplate ? PageType.Template : PageType.Page, true));
            }
            return(dict.Count);
        }