예제 #1
0
        public async Task <Wikitext> GetArticleMwRootAsync(IWikiArticle wikiArticle)
        {
            #region Log article

            var articleDataString = wikiArticle.ArticleData.Substring(0, Math.Min(wikiArticle.ArticleData.Length, 50));
            _logger.LogInformation($"{nameof(GetArticleMwRootAsync)} parsing content:{Environment.NewLine}{articleDataString}");

            #endregion

            if (wikiArticle.ContentModel != ContentModel.WikiText)
            {
                var errorMessage = $"This implementation of {nameof(MwParserApi)} can only load an instance of {nameof(IWikiArticle)} with {ContentModel.WikiText} {nameof(ContentModel)}";
                _logger.LogError(errorMessage);
                throw new NotSupportedException(errorMessage);
            }

            // Build article AST
            _logger.LogDebug("Building article AST...");

            var astRoot = await Task.Run(
                () => _parser.Parse(wikiArticle.ArticleData));

            _logger.LogDebug("Done.");

            if (astRoot == null)
            {
                var errorMessage = $"{nameof(WikitextParser)} returned null AST root.";
                _logger.LogError(errorMessage);
                throw new ApplicationException(errorMessage);
            }

            return(astRoot);
        }
        /// <summary>
        /// Parses wikitext, and asserts
        /// 1. Whether the parsed AST can be converted back to the same wikitext as input.
        /// 2. Whether the parsed AST is correct.
        /// </summary>
        public Wikitext ParseAndAssert(string text, string expectedDump, WikitextParserOptions options)
        {
            if (options == null)
            {
                throw new ArgumentNullException(nameof(options));
            }
            var parser = new WikitextParser {
                Options = options
            };
            var root       = parser.Parse(text);
            var parsedText = root.ToString();

            Output.WriteLine("Original Text\n====================");
            Output.WriteLine(text);
            Output.WriteLine("Parsed Text\n====================");
            Output.WriteLine(parsedText);
            var rootExpr = Utility.Dump(root);

            Output.WriteLine("AST Dump\n====================");
            Output.WriteLine(EscapeString(rootExpr));
            if (expectedDump != rootExpr)
            {
                Assert.Equal(EscapeString(expectedDump), EscapeString(rootExpr));
            }
            if (!options.AllowClosingMarkInference)
            {
                Assert.Equal(text, parsedText);
            }
            return(root);
        }
예제 #3
0
        private IEnumerable <WikiPage> GetStoryLinksPageList(WikiSite site, string pageTitle)
        {
            var targetPage = new WikiPage(site, pageTitle);

            targetPage.RefreshAsync(PageQueryOptions.FetchContent | PageQueryOptions.ResolveRedirects).Wait(); //Load page content

            //Get page text
            var parser       = new WikitextParser();
            var wikiPageText = parser.Parse(targetPage.Content);

            IEnumerable <Template> templateList = new List <Template>();
            var header = wikiPageText.Lines.SelectMany(x => x.EnumDescendants().OfType <Heading>()).Where(y => HeadersToSearch.Contains(y.ToPlainText())).SingleOrDefault();

            if (header != null)
            {
                templateList = header.EnumDescendants().OfType <Template>();
            }
            else
            {
                templateList = wikiPageText.EnumDescendants().OfType <Template>();
            }
            var storyLinkTemplates = templateList.Where(template => template.Name.Equals("storylink"));

            return(storyLinkTemplates.Select(template => new WikiPage(site, template.Arguments.Single().Value.ToPlainText())));
        }
        public Wikitext ParseWikitext(string text)
        {
            var parser = new WikitextParser();
            var root   = parser.Parse(text);

            return(root);
        }
예제 #5
0
        /// <summary>
        /// Loads a page from file and parse it.
        /// </summary>
        private static Wikitext LoadAndParse(string fileName)
        {
            var content = File.ReadAllText(fileName);
            var parser  = new WikitextParser();

            return(parser.Parse(content));
        }
예제 #6
0
        public void ParseBadText()
        {
            var wiki = @"
                          = Hello =
                          blah-blah. OK!";

            Assert.Throws <Sprache.ParseException>(() => WikitextParser.Parse(wiki));
        }
예제 #7
0
        private void UpdatePageContentWithMessage(WikiPage page, string content, string editMessage)
        {
            var parser   = new WikitextParser();
            var wikiText = parser.Parse(content);

            page.Content = wikiText.ToString();
            page.UpdateContentAsync(editMessage).Wait();
        }
예제 #8
0
        public static WikiJobRequest GetSampleContinuityLinkFixJobRequest()
        {
            var parser    = new WikitextParser();
            var ast       = parser.Parse("{{User:Tigerpaw28/Sandbox/Template:WikiBotRequest|type=Link Fix|username=Tigerpaw28|timestamp=14:58, 30 June 2020 (EDT)|before=[[Hot Shot (Armada)]]|after=[[Hot Shot (Armada)/Cartoon continuity]]|headers=Armada cartoon, Energon cartoon, Cybertron cartoon|media=Cartoon|pages=Optimus Prime (Armada)|comment=Test job|status=PendingPreApproval}}");
            var templates = ast.Lines.First <LineNode>().EnumDescendants().OfType <Template>();
            var request   = WikiJobRequestFactory.GetWikiJobRequest(JobType.ContinuityLinkFixJob, TimeZoneInfo.Local, templates.First());

            return(request);
        }
예제 #9
0
        public static WikiJobRequest GetSampleLinkFixJobRequest()
        {
            var parser    = new WikitextParser();
            var ast       = parser.Parse("{{User:Tigerpaw28/Sandbox/Template:WikiBotRequest|type=Link Fix|username=Tigerpaw28|timestamp=14:58, 30 June 2020 (EDT)|before=[[Commercial]]|after=[[Commercial|Test]]|pages=Commercial|comment=Test job|status=PendingPreApproval}}");
            var templates = ast.Lines.First <LineNode>().EnumDescendants().OfType <Template>();
            var request   = WikiJobRequestFactory.GetWikiJobRequest(JobType.LinkFixJob, TimeZoneInfo.Local, templates.First());

            return(request);
        }
예제 #10
0
        public void ParseSimpleTextTest()
        {
            var wiki     = "blah-blah. OK!";
            var doc      = WikitextParser.Parse(wiki);
            var elements = doc.Elements.Cast <WikitextPlainText>().ToArray();

            Assert.Single(elements);
            Assert.Equal(wiki, elements[0].Value);
        }
        public async Task <Entity> QueryByNameAsync(string name)
        {
            if (name == null)
            {
                throw new ArgumentNullException(nameof(name));
            }
            name = name.Trim();
            if (name == "")
            {
                return(null);
            }
            var site = await WikiFamily.GetSiteAsync(LanguageCode);

            var page = await FetchPageAsync(site, name);

            if (page == null)
            {
                return(null);
            }
            var parser   = new WikitextParser();
            var root     = parser.Parse(page.Content);
            var template = root.EnumDescendants().OfType <Template>()
                           .FirstOrDefault(t => distinguishingTemplates.Contains(Utility.NormalizeTitle(t.Name)));
            Entity entity;

            if (template != null)
            {
                switch (Utility.NormalizeTitle(template.Name))
                {
                case "Book":
                    entity = BuildVolume(root);
                    break;

                case "Charcat":
                    entity = BuildCat(root);
                    break;

                default:
                    Debug.Assert(false);
                    return(null);
                }
            }
            else if (await page.IsDisambiguationAsync())
            {
                entity = BuildDisambiguation(root);
            }
            else
            {
                entity = BuildUnknown(root);
            }
            entity.Name      = page.Title;
            entity.DetailUrl = Utility.GetPageUrl(site, page.Title);
            return(entity);
        }
예제 #12
0
        private static void ParseAndPrint()
        {
            var parser = new WikitextParser();

            Console.WriteLine("Please input the wikitext to parse; use EOF (Ctrl+Z) to accept:");
            var ast = parser.Parse(ReadInput());

            Console.WriteLine("Parsed AST");
            PrintAst(ast, 0);
            Console.WriteLine("Plain text");
            Console.WriteLine(ast.ToPlainText());
        }
예제 #13
0
        private WikiJobRequest BuildRequest()
        {
            var         parser    = new WikitextParser();
            var         ast       = parser.Parse("{{User:Tigerpaw28/Sandbox/Template:WikiBotRequest|type=Text Replacement|username=Tigerpaw28|timestamp=14:58, 30 June 2020 (EDT)|before=Deceptitran|after=not a Robot|comment=Test job|status=PendingPreApproval}}");
            var         templates = ast.Lines.First <LineNode>().EnumDescendants().OfType <Template>();
            var         request   = WikiJobRequestFactory.GetWikiJobRequest(JobType.TextReplacementJob, TimeZoneInfo.Local, templates.First());
            List <Page> pages     = new List <Page>();

            pages.Add(new Page(0, "Test"));
            pages.Add(new Page(1, "Commercial"));
            request.Pages = pages;
            return(request);
        }
예제 #14
0
        public void ParseTextWithHeader()
        {
            var wiki     = @"
                          = Hello =

                          blah-blah. OK!";
            var doc      = WikitextParser.Parse(wiki);
            var elements = doc.Elements.Cast <IWikitextValue>().ToArray();

            Assert.Equal(2, elements.Length);
            Assert.Equal("Hello", elements[0].Value);
            Assert.Equal("blah-blah. OK!", elements[1].Value);
        }
예제 #15
0
        static async Task WorkAsync(IAsyncEnumerable <WikiPage> pages)
        {
            const string templateName = "WbClientLite/SiteLinks";
            var          parser       = new WikitextParser();
            var          counter      = 0;

            using (var ie = pages.GetEnumerator())
            {
                while (await ie.MoveNext())
                {
                    var page = ie.Current;
                    counter++;
                    Console.Write("{0}: {1} ", counter, page);
                    var root = parser.Parse(page.Content);
                    if (root.EnumDescendants().OfType <Template>().Any(t => MwParserUtility.NormalizeTitle(t.Name) == templateName))
                    {
                        Console.WriteLine("Skipped");
                        continue;
                    }
                    var langLinks = root.EnumDescendants().OfType <WikiLink>().Where(l =>
                    {
                        var wl = WikiClientLibrary.WikiLink.Parse(page.Site, l.Target.ToString());
                        return(wl.InterwikiPrefix != null);
                    }).ToList();
                    // Remove old language links.
                    foreach (var link in langLinks)
                    {
                        if (link.PreviousNode is PlainText pt1 && string.IsNullOrWhiteSpace(pt1.Content))
                        {
                            pt1.Remove();
                        }
                        if (link.NextNode is PlainText pt2 && string.IsNullOrWhiteSpace(pt2.Content))
                        {
                            pt2.Remove();
                        }
                        var parent = link.ParentNode;
                        link.Remove();
                        if (!parent.EnumChildren().Any())
                        {
                            parent.Remove();
                        }
                    }
                    // Insert new template.
                    root.Lines.Add(new Paragraph(new PlainText("\n"), new Template(new Run(new PlainText(templateName)))));
                    page.Content = root.ToString();
                    await page.UpdateContentAsync("使用CrystalPool提供的语言链接。", true, true);

                    Console.WriteLine("Done");
                }
            }
        }
예제 #16
0
        static void SimpleDemo()
        {
            // Fills the missing template parameters.
            var parser        = new WikitextParser();
            var templateNames = new [] { "Expand section", "Cleanup" };
            var text          = @"==Hello==<!--comment-->
{{Expand section|
  date=2010-10-05
}}
{{Cleanup}}
This is a nice '''paragraph'''.
==References==
{{Reflist}}
";
            var ast           = parser.Parse(text);
            // Convert the code snippets to nodes
            var dateName  = parser.Parse("date");
            var dateValue = parser.Parse(DateTime.Now.ToString("yyyy-MM-dd"));

            Console.WriteLine("Issues:");
            // Search and set
            foreach (var t in ast.EnumDescendants().OfType <Template>()
                     .Where(t => templateNames.Contains(MwParserUtility.NormalizeTemplateArgumentName(t.Name))))
            {
                // Get the argument by name.
                var date = t.Arguments["date"];
                if (date != null)
                {
                    // To print the wikitext instead of user-friendly text, use ToString()
                    Console.WriteLine("{0} ({1})", t.Name.ToPlainText(), date.Value.ToPlainText());
                }
                // Update/Add the argument
                t.Arguments.SetValue(dateName, dateValue);
            }
            Console.WriteLine();
            Console.WriteLine("Wikitext:");
            Console.WriteLine(ast.ToString());
        }
예제 #17
0
        protected override void OnRefreshDocumentOutline()
        {
            base.OnRefreshDocumentOutline();
            // Show document headings
            var parser       = new WikitextParser();
            var documentText = TextBox.Text;

            Heading[] headings = null;
            if (!string.IsNullOrWhiteSpace(documentText))
            {
                var root = parser.Parse(documentText);
                headings = root.EnumDescendants().OfType <Heading>().ToArray();
            }
            Dispatcher.AutoInvoke(() =>
            {
                DocumentOutline.Clear();
                if (headings == null)
                {
                    return;
                }
                var levelStack = new Stack <Tuple <Heading, DocumentOutlineItem> >();
                foreach (var h in headings)
                {
                    var outline = new DocumentOutlineItem
                    {
                        Text           = string.Join(null, h.Inlines).Trim(),
                        OutlineContext = h
                    };
                    outline.DoubleClick += OutlineItem_DoubleClick;
                    while (levelStack.Count > 0)
                    {
                        var lastLevel = levelStack.Peek().Item1.Level;
                        if (lastLevel < h.Level)
                        {
                            // Append as child item.
                            levelStack.Peek().Item2.Children.Add(outline);
                            goto NEXT;
                        }
                        // Sibling or upper levels.
                        levelStack.Pop();
                    }
                    // levelStack.Count == 0
                    DocumentOutline.Add(outline);
                    levelStack.Push(Tuple.Create(h, outline));
                    NEXT:
                    ;
                }
            });
        }
예제 #18
0
        private static void ParseAndPrint(List <Page> pages, string title)
        {
            var page = pages.FirstOrDefault(x => x.Title == title);

            if (page == null)
            {
                Console.WriteLine("Page '{0}' not found.", title);
                return;
            }

            var parser = new WikitextParser();
            var ast    = parser.Parse(page.Text);

            // PrintAst(ast, 0);
            Console.WriteLine(ast.ToPlainText());
        }
예제 #19
0
        /// <summary>
        /// Builds <see cref="PageInfo"/> from a MediaWiki page with its content fetched.
        /// </summary>
        public static PageInfo BuildBasicInfo(Page page, WikitextParser parser)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }
            if (parser == null)
            {
                throw new ArgumentNullException(nameof(parser));
            }
            var info = new PageInfo
            {
                Title            = page.Title,
                LastRevisionId   = page.LastRevisionId,
                LastRevisionTime = page.LastRevision?.TimeStamp ?? DateTime.MinValue,
                LastRevisionUser = page.LastRevision?.UserName,
                ContentLength    = page.ContentLength,
            };

            if (page.IsRedirect)
            {
                info.Description = page.Content;
            }
            else if (!string.IsNullOrWhiteSpace(page.Content))
            {
                if (page.InferContentModel() == ContentModels.Wikitext)
                {
                    var p = parser.Parse(page.Content);
                    // Search for leading line.
                    var leadingLine = p.EnumChildren()
                                      .OfType <Paragraph>()
                                      .FirstOrDefault(line => line.Inlines.OfType <PlainText>()
                                                      .Any(pt => !string.IsNullOrWhiteSpace(pt.Content)));
                    if (leadingLine != null)
                    {
                        info.Description = leadingLine.ToString();
                    }
                    // Collect template arguments, if any.
                    info.TemplateArguments = p.EnumDescendants()
                                             .OfType <ArgumentReference>()
                                             .Select(r => new TemplateArgumentInfo(r.Name.ToString().Trim()))
                                             .Distinct(TemplateArgumentInfoComparer.Default)
                                             .ToArray();
                }
            }
            return(info);
        }
예제 #20
0
 private void Parse()
 {
     if (TextView != null)
     {
         var parser = new WikitextParser();
         var text   = TextView.Dispatcher.AutoInvoke(() => TextView.Document.Text);
         var sw     = Stopwatch.StartNew();
         var ast    = parser.Parse(text);
         Trace.WriteLine("Parsed " + text.Length + " chars in " + sw.Elapsed);
         documentAstInvalidated = false;
         TextView.Dispatcher.BeginInvoke((Action)(() =>
         {
             AstRoot = ast;
             TextView.Redraw();
         }));
     }
     documentAstInvalidated = false;
 }
예제 #21
0
        public static async Task <string> GetWikiPage(string title)
        {
            var requestContent = new Dictionary <string, string>
            {
                { "format", "json" },
                { "action", "query" },
                { "prop", "revisions" },
                { "rvlimit", "1" },
                { "rvprop", "content" },
                { "titles", title },
                { "redirects", "resolve" }
            };

            using (var client = new HttpClient())
                using (var postVars = new FormUrlEncodedContent(requestContent))
                {
                    var response = await client.PostAsync(endpointUrl, postVars);

                    var stringContent = await response.Content.ReadAsStringAsync();

                    var root    = JObject.Parse(stringContent);
                    var content = (string)root["query"]["pages"].Children <JProperty>().First().Value["revisions"][0]["*"];
                    var parser  = new WikitextParser();

                    var result = parser.Parse(content);

                    var lines     = result.EnumChildren().ToList();
                    var textLines = lines.Select(c => ExtractStringFromQuery(c))
                                    .Where(c => c != "\n")
                                    .Select(c => c.Trim())
                                    .ToList();

                    var taken = textLines
                                .SkipWhile(c => String.IsNullOrEmpty(c))
                                .TakeWhile(c => !String.IsNullOrEmpty(c) && !c.StartsWith("**"))
                                .Take(3)
                                .ToList();

                    return(string.Join("\n", taken));
                }
        }
예제 #22
0
        public static List <WikiJobRequest> GetSampleJobRequests(bool includePages)
        {
            var requests = new List <WikiJobRequest>();
            var array    = GetRawRequestArray();
            var parser   = new WikitextParser();

            for (int x = 0; x < 5; x++)
            {
                var ast       = parser.Parse(array[x]);
                var templates = ast.Lines.First <LineNode>().EnumDescendants().OfType <Template>();
                var request   = WikiJobRequestFactory.GetWikiJobRequest(JobType.TextReplacementJob, TimeZoneInfo.Local, templates.First());
                request.ID = x + 2;
                if (!includePages)
                {
                    request.Pages = null;
                }
                requests.Add(request);
            }

            return(requests);
        }
예제 #23
0
        /// <summary>
        /// Fetches a page from en Wikipedia, and parse it.
        /// </summary>
        private static Wikitext FetchAndParse(string title)
        {
            if (title == null)
            {
                throw new ArgumentNullException(nameof(title));
            }
            const string EndPointUrl    = "https://en.wikipedia.org/w/api.php";
            var          client         = new HttpClient();
            var          requestContent = new Dictionary <string, string>
            {
                { "format", "json" },
                { "action", "query" },
                { "prop", "revisions" },
                { "rvlimit", "1" },
                { "rvprop", "content" },
                { "titles", title }
            };
            var response = client.PostAsync(EndPointUrl, new FormUrlEncodedContent(requestContent)).Result;
            var root     = JObject.Parse(response.Content.ReadAsStringAsync().Result);
            var content  = (string)root["query"]["pages"].Children <JProperty>().First().Value["revisions"][0]["*"];
            var parser   = new WikitextParser();

            return(parser.Parse(content));
        }
예제 #24
0
        public IEnumerable <string> FactsFromPage(WikiPage page)
        {
            var root       = wikiParser.Parse(page.Content);
            var infoboxCat = root.EnumDescendants().OfType <Template>()
                             .FirstOrDefault(t => MwParserUtility.NormalizeTitle(t.Name) == "Charcat");

            if (infoboxCat == null)
            {
                Console.WriteLine("No {{Charcat}} found.");
                yield return("% No {{Charcat}} found.");

                yield break;
            }
            var atom = AtomExpr(page.Title.Trim());

            yield return($"name({atom}, \"{StripDabTitle(page.Title)}\").");

            switch (EnWikiHelper.IsTom(root))
            {
            case true:
                yield return($"male({atom}).");

                break;

            case false:
                yield return($"female({atom}).");

                break;

            default:
                break;
            }
            var affie = infoboxCat.Arguments["affie"];

            if (affie != null)
            {
                foreach (var aff in affie.EnumDescendants().OfType <WikiLink>())
                {
                    yield return($"belongsto({atom}, {AtomExpr(aff.Target.ToPlainText().Trim())}).");
                }
            }
            var familyt = infoboxCat.Arguments["familyt"];
            var familyl = infoboxCat.Arguments["familyl"];

            if (familyt != null && familyl != null)
            {
                var familyDict = EnWikiHelper.ParseFamily(familyt.Value, familyl.Value);
                familyDict.TryGetValue("mother", out var mothers);
                familyDict.TryGetValue("father", out var fathers);
                var mother = mothers?.FirstOrDefault();
                var father = fathers?.FirstOrDefault();
                if (mother != null || father != null)
                {
                    Debug.Assert(mothers == null || mothers.Count <= 1);
                    Debug.Assert(fathers == null || fathers.Count <= 1);
                    yield return($"child({atom}, {AtomExpr(father ?? "x")}, {AtomExpr(mother ?? "x")}).");
                }
                else
                {
                    yield return("% No parent found.");
                }
            }
            else
            {
                Console.WriteLine("No familyt/familyl.");
                yield return("% No {{Charcat |familyl= |familyt= }} found.");
            }
            var apps = infoboxCat.Arguments["mentor"];

            if (apps != null)
            {
                foreach (var appr in apps.EnumDescendants().OfType <WikiLink>())
                {
                    yield return($"apprentice({atom}, {AtomExpr(appr.Target.ToPlainText().Trim())}).");
                }
            }
        }
예제 #25
0
        public async void UpdateRequests(List <WikiJobRequest> requests)
        {
            using (var client = new WikiClient
            {
                ClientUserAgent = "WCLQuickStart/1.0 (your user name or contact information here)"
            })

            {
                try
                {
                    // You can create multiple WikiSite instances on the same WikiClient to share the state.
                    var site = _wikiAccessLogic.GetLoggedInWikiSite(_wikiLoginConfig, client, _log);

                    var page = new WikiPage(site, _wikiRequestPage);

                    _log.Information("Pulling requests from job request page for status update.");

                    // Fetch content of job request page so we can update it
                    await page.RefreshAsync(PageQueryOptions.FetchContent
                                            | PageQueryOptions.ResolveRedirects);

                    var parser   = new WikitextParser();
                    var wikiText = parser.Parse(page.Content);

                    foreach (WikiJobRequest request in requests)
                    {
                        _log.Information($"Processing request ID: {request.ID} with raw {request.RawRequest}");
                        //Find corresponding template in the page content
                        var templates        = wikiText.Lines.SelectMany(x => x.EnumDescendants().OfType <Template>());
                        var requestTemplates = templates.Where(template => template.Name.ToPlainText().Equals(_botRequestTemplate));
                        _log.Information($"{requestTemplates.ToList().Count} templates found for template {_botRequestTemplate}");
                        _log.Information($"Template id: {requestTemplates.First().Arguments.SingleOrDefault(arg => arg.Name.ToPlainText().Equals("id"))}");
                        var singletemplate = requestTemplates.First(template => template.EqualsJob(request));

                        if (singletemplate.Arguments.SingleOrDefault(arg => arg.Name.ToPlainText().Equals("status")) == null) //Status argument doesn't exist in the template
                        {
                            var templateArgument = new TemplateArgument {
                                Name = parser.Parse("status"), Value = parser.Parse(request.Status.ToString())
                            };
                            singletemplate.Arguments.Add(templateArgument);
                        }
                        else //Status argument exists
                        {
                            singletemplate.Arguments.Single(arg => arg.Name.ToPlainText().Equals("status")).Value = parser.Parse(request.Status.ToString());
                        }

                        if (singletemplate.Arguments.SingleOrDefault(arg => arg.Name.ToPlainText().Equals("id")) == null) //ID argument doesn't exist in the template
                        {
                            var templateArgument = new TemplateArgument {
                                Name = parser.Parse("id"), Value = parser.Parse(request.ID.ToString())
                            };
                            singletemplate.Arguments.Add(templateArgument);
                        }

                        request.RawRequest = singletemplate.ToString();
                        _database.UpdateRaw(request.ID, request.RawRequest); //TODO: Make batch operation
                    }

                    //Update the content of the page object and push it live
                    await UpdatePageContent(wikiText.ToString(), "Updating request ids and statuses", page);


                    // We're done here
                    await site.LogoutAsync();
                }
                catch (Exception ex)
                {
                    _log.Error(ex, "An error occurred while trying to update requests: ");
                }
            }
        }
예제 #26
0
        public async Task PopulateRelationsAsync()
        {
            var processedEntities = GetProcessedEntities();
            await zhWarriorsSite.Initialization;
            var counter = 0;

            foreach (var catg in GetCatsToProcess(processedEntities).Buffer(50))
            {
                await catg.Select(t => t.ZhPage).RefreshAsync(PageQueryOptions.FetchContent);

                foreach (var(id, title, page) in catg)
                {
                    counter++;
                    Logger.LogInformation("[{}] Processing {} -> {}", counter, title, id);
                    try
                    {
                        await EditEntityAsync(new Entity(Site, id), page);

                        processedEntities.Add(id);
                    }
                    catch (KeyNotFoundException)
                    {
                        Logger.LogWarning("Missing entity.");
                    }
                    WriteProcessedEntities(processedEntities);
                }
            }

            async Task EditEntityAsync(Entity entity, WikiPage page)
            {
                var root    = parser.Parse(page.Content);
                var infobox = root.EnumDescendants().TemplatesWithTitle("Infobox cat").FirstOrDefault();

                if (infobox == null)
                {
                    Logger.LogError("No {{Infobox cat}} found.");
                    return;
                }
                var father  = infobox.Arguments["father"]?.Value.EnumDescendants().OfType <WikiLink>().FirstOrDefault()?.Target.ToPlainText();
                var mother  = infobox.Arguments["mother"]?.Value.EnumDescendants().OfType <WikiLink>().FirstOrDefault()?.Target.ToPlainText();
                var mates   = infobox.Arguments["mate"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.ToPlainText()).ToList();
                var fosters = infobox.Arguments["foster_father"]?.Value.EnumDescendants()
                              .Concat(infobox.Arguments["foster_mother"]?.Value.EnumDescendants() ?? Enumerable.Empty <Node>())
                              .OfType <WikiLink>().Select(l => l.Target.ToPlainText()).ToList();
                var mentors = infobox.Arguments["mentor"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.ToPlainText()).ToList();

                Console.WriteLine(father);
                Console.WriteLine(mother);
                Console.WriteLine(string.Join(";", mates));
                Console.WriteLine(string.Join(";", fosters));
                Console.WriteLine(string.Join(";", mentors));

                var claims = new List <Claim>();

                if (father != null)
                {
                    var f = CPRepository.EntityFromZhSiteLink(father);
                    if (f == null)
                    {
                        WriteMissingEntity(father);
                        throw new KeyNotFoundException();
                    }
                    claims.Add(new Claim("P88", f, BuiltInDataTypes.WikibaseItem));
                }
                if (mother != null)
                {
                    var m = CPRepository.EntityFromZhSiteLink(mother);
                    if (m == null)
                    {
                        WriteMissingEntity(mother);
                        throw new KeyNotFoundException();
                    }
                    claims.Add(new Claim("P89", m, BuiltInDataTypes.WikibaseItem));
                }
                if (fosters != null)
                {
                    foreach (var foster in fosters)
                    {
                        var f = CPRepository.EntityFromZhSiteLink(foster);
                        if (f == null)
                        {
                            WriteMissingEntity(foster);
                            throw new KeyNotFoundException();
                        }
                        claims.Add(new Claim("P99", f, BuiltInDataTypes.WikibaseItem));
                    }
                }
                if (mates != null)
                {
                    var index = 1;
                    foreach (var mate in mates)
                    {
                        var f = CPRepository.EntityFromZhSiteLink(mate);
                        if (f == null)
                        {
                            WriteMissingEntity(mate);
                            throw new KeyNotFoundException();
                        }
                        claims.Add(new Claim("P100", f, BuiltInDataTypes.WikibaseItem)
                        {
                            Qualifiers = { new Snak("P53", index.ToString(), BuiltInDataTypes.String) }
                        });
                        index++;
                    }
                }
                if (mentors != null)
                {
                    foreach (var mentor in mentors)
                    {
                        var f = CPRepository.EntityFromZhSiteLink(mentor);
                        if (f == null)
                        {
                            WriteMissingEntity(mentor);
                            throw new KeyNotFoundException();
                        }
                        claims.Add(new Claim("P86", f, BuiltInDataTypes.WikibaseItem));
                    }
                }
                if (claims.Any())
                {
                    await entity.EditAsync(claims.Select(c => new EntityEditEntry(nameof(entity.Claims), c)),
                                           "Populate relations from zhwarriorswiki.", EntityEditOptions.Bot);
                }
            }
        }
예제 #27
0
        public override void Execute()
        {
            SetJobStart();

            try
            {
                using (var client = new WikiClient())
                {
                    var site           = _wikiAccessLogic.GetLoggedInWikiSite(WikiConfig, client, Log);
                    var parser         = new WikitextParser();
                    var wikiText       = parser.Parse(FromText);
                    var fromLinkTarget = wikiText.Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>()).FirstOrDefault().Target.ToPlainText();


                    var PageList = GetBackLinksPageList(site, fromLinkTarget);

                    string filename   = "";
                    string diff       = "";
                    string filePath   = "";
                    var    folderName = Request.ID.ToString();
                    var    folderPath = Path.Combine(Configuration["DiffDirectory"], folderName);
                    if (!Directory.Exists(folderPath))
                    {
                        Directory.CreateDirectory(folderPath);
                    }

                    foreach (WikiPage page in PageList)
                    {
                        Log.Information("Processing page {PageName}", page.Title);

                        page.RefreshAsync(PageQueryOptions.FetchContent | PageQueryOptions.ResolveRedirects).Wait(); //Load page content

                        var beforeContent = page.Content;
                        var wikiPageText  = parser.Parse(beforeContent);
                        IEnumerable <WikiLink> wikiLinks = null;
                        if (string.IsNullOrWhiteSpace(string.Join(' ', HeadersToSearch)))
                        {
                            wikiLinks = wikiPageText.Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>());
                        }
                        else
                        {
                            var header = wikiPageText.Lines.SelectMany(x => x.EnumDescendants().OfType <Heading>()).Where(y => y.ToPlainText().Equals(HeadersToSearch)).Single();
                            wikiLinks = header.EnumDescendants().OfType <WikiLink>();
                        }
                        var matchingLinks = wikiLinks.Where(link => CompareLinks(link.Target.ToString(), fromLinkTarget)).ToList();

                        if (!matchingLinks.Any() || page.Title.Equals(Configuration["WikiRequestPage"], StringComparison.OrdinalIgnoreCase))
                        {
                            Request.Pages.RemoveAll(x => x.Name.Equals(page.Title, StringComparison.OrdinalIgnoreCase));
                        }
                        else
                        {
                            foreach (WikiLink link in matchingLinks)
                            {
                                Log.Debug($"Link target starts: {link.Target}");
                                var newTarget = parser.Parse(ToText).Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>()).FirstOrDefault().Target.ToPlainText();
                                if (link.Text == null && (!link.Target.ToPlainText().Contains("(") && newTarget.Contains("(")))
                                {
                                    link.Text = new Run(new PlainText(link.Target.ToPlainText())); //Maintain original link text if the link had no custom text and no disambig
                                }
                                link.Target = new Run(new PlainText(newTarget));
                                Log.Debug($"Link target ends: {link.Target}");
                            }
                            Log.Debug($"Content after: {wikiPageText}");


                            var afterContent = wikiPageText.ToString();

                            if (Request.Status != JobStatus.Approved) //Create diffs for approval
                            {
                                Log.Information("Generating diff for page {PageName}", page.Title);
                                Utilities.GenerateAndSaveDiff(beforeContent, afterContent, page.Title, Request.ID, Configuration["DiffDirectory"], folderName);
                                //var wikiDiff = new WikiDiff();
                                //diff = $"{WikiDiff.DiffHead()}</head><body>{WikiDiff.TableHeader}{wikiDiff.GetDiff(beforeContent, afterContent, 1)}</table></body></html>";
                                //filename = "Diff-" + Request.ID + "-" + page.Title + ".txt"; //Set filename for this page
                                //filename = Utilities.SanitizeFilename(filename, '_');

                                //filePath = Path.Combine(Configuration["DiffDirectory"], folderName, filename);
                                //File.WriteAllText(filePath, diff);
                                JobData.SaveWikiJobRequest(Request); //Save page list
                            }
                            else //Apply changes
                            {
                                Log.Information("Applying replacement for page {PageName}", page.Title);
                                var editMessage = $"{WikiConfig["Username"]} Text Replacement {FromText} => {ToText}";
                                ((TFWikiJobRetriever)Retriever).UpdatePageContent(afterContent, editMessage, page).Wait();
                            }
                        }
                        Thread.Sleep(1000 * _throttleSpeedInSeconds);
                    }
                }
                Thread.Sleep(1000);
            }
            catch (Exception ex)
            {
                Request.Status = JobStatus.Failed;
                Log.Error(ex, $"TextReplacementJob with ID: {Request.ID} failed.");
            }
            finally
            {
                SetJobEnd();
                SaveRequest();
            }
        }
예제 #28
0
        public override void Execute()
        {
            SetJobStart();
            List <WikiPage> AlreadyUpdatedPages = new List <WikiPage>();
            List <WikiPage> NoUpdateNeededPages = new List <WikiPage>();

            try
            {
                using (var client = new WikiClient())
                {
                    var site           = _wikiAccessLogic.GetLoggedInWikiSite(WikiConfig, client, Log);
                    var parser         = new WikitextParser();
                    var wikiText       = parser.Parse(FromText);
                    var fromLinkTarget = wikiText.Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>()).FirstOrDefault().Target.ToPlainText();
                    wikiText = parser.Parse(ToText);
                    var toLinkTarget = wikiText.Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>()).FirstOrDefault().Target.ToPlainText();

                    var PageList          = GetBackLinksPageList(site, fromLinkTarget);
                    var storyLinkPageList = GetStoryLinksPageList(site, toLinkTarget);

                    //string filename = "";
                    //string diff = "";
                    //string filePath = "";
                    var folderName = Request.ID.ToString();
                    var folderPath = Path.Combine(Configuration["DiffDirectory"], folderName);
                    if (!Directory.Exists(folderPath))
                    {
                        Directory.CreateDirectory(folderPath);
                    }

                    foreach (WikiPage page in PageList)
                    {
                        Log.Information("Processing page {PageName}", page.Title);

                        IEnumerable <WikiLink> wikiLinks = null;

                        page.RefreshAsync(PageQueryOptions.FetchContent | PageQueryOptions.ResolveRedirects).Wait(); //Load page content

                        //Get page text
                        var beforeContent = page.Content;
                        var wikiPageText  = parser.Parse(beforeContent);

                        if (string.IsNullOrWhiteSpace(string.Join(' ', HeadersToSearch)))
                        {
                            throw new Exception("No continuity header specified");
                        }
                        else
                        {
                            //Get any wiki links under the header
                            var headers         = wikiPageText.EnumDescendants().OfType <Heading>();
                            var matchingHeaders = headers.Where(y => HeadersToSearch.Contains(y.ToPlainText()) || HeadersToSearch.Contains(y.ToString()));
                            //Need to handle cases like Armada Megatron where links are on cartoon/comic/whatever subpage. Look for subpage header based on media type, get subpage, add to Pages list and get links.
                            if (matchingHeaders.Any())
                            {
                                var contentNodes = GetContentBetweenHeaders(headers, matchingHeaders, wikiPageText);
                                wikiLinks = contentNodes.SelectMany(x => x.EnumDescendants().OfType <WikiLink>());
                            }
                            else
                            {
                                var templates = wikiPageText.Lines.SelectMany(x => x.EnumDescendants().OfType <Template>());

                                var mediaTemplate = templates.Where(template => template.Name.ToPlainText().Equals(GetTemplateNameByMedia(Media))).SingleOrDefault();

                                if (mediaTemplate != null || HasFileTemplateForMedia(templates, Media))
                                {
                                    wikiLinks = wikiPageText.Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>());
                                }
                                else
                                {
                                    var mainTemplates = templates.Where(template => template.Name.ToPlainText().Equals("main"));
                                    if (mainTemplates.Any())
                                    {
                                        wikiLinks = GetMainTemplatePageLinks(mainTemplates, site);
                                    }
                                }
                            }
                        }
                        var matchingLinks = wikiLinks?.Where(link => CompareLinks(link.Target.ToString(), fromLinkTarget)).ToList();

                        if (matchingLinks == null || !matchingLinks.Any() || page.Title.Equals(Configuration["WikiRequestPage"], StringComparison.OrdinalIgnoreCase))
                        {
                            Request.Pages.RemoveAll(x => x.Name.Equals(page.Title, StringComparison.OrdinalIgnoreCase));
                            if (wikiLinks?.Where(link => CompareLinks(link.Target.ToString(), toLinkTarget)).Any() ?? false)
                            {
                                AlreadyUpdatedPages.Add(page);
                            }
                            else
                            {
                                NoUpdateNeededPages.Add(page);
                            }
                        }
                        else
                        {
                            foreach (WikiLink link in matchingLinks)
                            {
                                Log.Debug($"Link target starts: {link.Target}");
                                var newTarget = parser.Parse(ToText).Lines.SelectMany(x => x.EnumDescendants().OfType <WikiLink>()).FirstOrDefault().Target.ToPlainText();
                                if (link.Text == null)
                                {
                                    link.Text = new Run(new PlainText(link.Target.ToPlainText())); //Maintain original link text if the link had no custom text
                                }
                                link.Target = new Run(new PlainText(newTarget));
                                Log.Debug($"Link target ends: {link.Target}");
                            }
                            Log.Debug($"Content after: {wikiPageText}");


                            var afterContent = wikiPageText.ToString();

                            if (Request.Status != JobStatus.Approved) //Create diffs for approval
                            {
                                Log.Information("Generating diff for page {PageName}", page.Title);

                                Utilities.GenerateAndSaveDiff(beforeContent, afterContent, page.Title, Request.ID, Configuration["DiffDirectory"], folderName);

                                JobData.SaveWikiJobRequest(Request); //Save page list
                            }
                            else //Apply changes
                            {
                                Log.Information("Applying replacement for page {PageName}", page.Title);
                                var editMessage = $"{WikiConfig["Username"]} Text Replacement {FromText} => {ToText}";
                                ((TFWikiJobRetriever)Retriever).UpdatePageContent(afterContent, editMessage, page).Wait();
                            }
                        }

                        Thread.Sleep(1000 * _throttleSpeedInSeconds);
                    }
                }
                Thread.Sleep(1000);
            }
            catch (Exception ex)
            {
                Request.Status = JobStatus.Failed;
                Log.Error(ex, $"TextReplacementJob with ID: {Request.ID} failed.");
            }
            finally
            {
                SetJobEnd();
                SaveRequest();
            }
        }