public async Task RunAsync() { var enWw = new WikiSite(Site.WikiClient, "http://warriors.wikia.com/api.php"); // var books = CPRepository.ExecuteQuery(@" //SELECT ?book ?link { // { ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. } // ?link schema:isPartOf <http://warriors.wikia.com/>; // schema:about ?book. //}") // .Select(r => (book: (UriNode)r["book"], link: (UriNode)r["link"])) // .ToList(); var books = CPRepository.ExecuteQuery(@" SELECT ?book ?label { { ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. } ?book rdfs:label ?label. FILTER (lang(?label) = 'en') }") .Select(r => (id: CPRepository.StripEntityUri(((UriNode)r["book"]).Uri), label: ((LiteralNode)r["label"]).Value)); await enWw.Initialization; foreach (var book in books) { string lastChapterId = null; var bookItem = new Entity(Site, book.id); var tlabel = book.label; RETRY: var parsingTask = enWw.ParseContentAsync("{{Chapters/b|" + tlabel + "}}", null, null, ParsingOptions.None); await bookItem.RefreshAsync(EntityQueryOptions.FetchLabels | EntityQueryOptions.FetchAliases, new[] { "en", "zh-cn", "zh-tw" }); var labelEn = bookItem.Labels["en"]; var labelCn = bookItem.Labels["zh-cn"] ?? labelEn; var labelTw = bookItem.Labels["zh-tw"] ?? labelCn; Logger.LogInformation("{}, {}, {}", labelEn, labelCn, labelTw); var doc = new HtmlDocument(); doc.LoadHtml((await parsingTask).Content); var nodes = doc.DocumentNode.SelectNodes("//a[@href]"); if (nodes == null) { if (!tlabel.Contains('(')) { tlabel += " (Book)"; goto RETRY; } Logger.LogError("No chapter information found."); continue; } foreach (var node in nodes) { var chLabels = new WbMonolingualTextCollection(); var chAliases = new WbMonolingualTextsCollection(); var chDescriptions = new WbMonolingualTextCollection(); var text = node.InnerText.Trim(); var n = TryMatchChapterNumber(text); var nId = n?.ToString(); if (n == null) { switch (text.ToLowerInvariant()) { case "prologue": chLabels["en"] = labelEn + ", Prologue"; chDescriptions["en"] = "prologue chapter of " + labelEn; foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-0"); } chLabels["zh-cn"] = "《" + labelCn + "》引子"; chDescriptions["zh-cn"] = "《" + labelCn + "》的引子章节"; chLabels["zh-tw"] = "《" + labelTw + "》序章"; chDescriptions["zh-tw"] = "《" + labelTw + "》的序章"; chAliases.Add("zh-cn", labelCn + " 引子"); chAliases.Add("zh-tw", labelTw + " 序章"); chAliases.Add("zh-cn", labelCn + " 0"); chAliases.Add("zh-tw", labelTw + " 0"); nId = "0"; break; case "epilogue": chLabels["en"] = labelEn + ", Epilogue"; chDescriptions["en"] = "epilogue chapter of " + labelEn; foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-E"); } chLabels["zh-cn"] = "《" + labelCn + "》尾声"; chDescriptions["zh-cn"] = "《" + labelCn + "》的尾声章节"; chLabels["zh-tw"] = "《" + labelTw + "》尾聲"; chDescriptions["zh-tw"] = "《" + labelTw + "》的尾聲章節"; chAliases.Add("zh-cn", labelCn + " 尾声"); chAliases.Add("zh-tw", labelTw + " 尾聲"); nId = "E"; break; default: chLabels["en"] = labelEn + ", " + text; chDescriptions["en"] = "a chapter of " + labelEn; var abbr = GetAbbr(text); foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-" + abbr); } chLabels["zh-cn"] = "《" + labelCn + "》" + text; chDescriptions["zh-cn"] = "《" + labelCn + "》的一个章节"; chLabels["zh-tw"] = "《" + labelTw + "》" + text; chDescriptions["zh-tw"] = "《" + labelTw + "》的一個章節"; chAliases.Add("zh-cn", labelCn + " " + abbr); chAliases.Add("zh-tw", labelTw + " " + abbr); break; } } else { chLabels["en"] = labelEn + ", Chapter " + n; chDescriptions["en"] = "Chapter " + n + " of " + labelEn; foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-" + n); } var zhOrdinal = Utility.GetOrdinalZh(n.Value); chLabels["zh-cn"] = "《" + labelCn + "》第" + zhOrdinal + "章"; chDescriptions["zh-cn"] = "《" + labelCn + "》的第" + zhOrdinal + "章"; chLabels["zh-tw"] = "《" + labelTw + "》第" + zhOrdinal + "章"; chDescriptions["zh-tw"] = "《" + labelTw + "》的第" + zhOrdinal + "章"; chAliases.Add("zh-cn", labelCn + " " + n); chAliases.Add("zh-tw", labelTw + " " + n); } string cid = null; if ((cid = CPRepository.EntityFromLabel(chLabels["en"])) != null) { Logger.LogWarning("Entity exists."); lastChapterId = cid; if (labelEn.Contains("Hollyleaf's Story")) { continue; } break; } if (labelEn == labelCn) { chLabels.Remove("zh-cn"); chAliases.Remove("zh-cn"); chDescriptions.Remove("zh-cn"); } if (labelEn == labelTw || labelCn == labelTw) { chLabels.Remove("zh-tw"); chAliases.Remove("zh-tw"); chDescriptions.Remove("zh-tw"); } //foreach (var l in chLabels) Console.WriteLine(l); //foreach (var l in chAliases) Console.WriteLine(l); //foreach (var l in chDescriptions) Console.WriteLine(l); var claims = new List <Claim> { new Claim("P3", "Q109", BuiltInDataTypes.WikibaseItem), }; { var c = new Claim("P50", book.id, BuiltInDataTypes.WikibaseItem); if (nId != null) { c.Qualifiers.Add(new Snak("P53", nId, BuiltInDataTypes.String)); } if (lastChapterId != null) { c.Qualifiers.Add(new Snak("P48", lastChapterId, BuiltInDataTypes.WikibaseItem)); } claims.Add(c); } var chEntity = new Entity(Site, EntityType.Item); var edits = new List <EntityEditEntry>(); edits.AddRange(chLabels.Select(l => new EntityEditEntry(nameof(chEntity.Labels), l))); edits.AddRange(chAliases.Select(l => new EntityEditEntry(nameof(chEntity.Aliases), l))); edits.AddRange(chDescriptions.Select(l => new EntityEditEntry(nameof(chEntity.Descriptions), l))); edits.AddRange(claims.Select(c => new EntityEditEntry(nameof(chEntity.Claims), c))); if (!node.HasClass("new")) { var title = WebUtility.UrlDecode(node.GetAttributeValue("href", "").Replace("/wiki/", "")); edits.Add(new EntityEditEntry(nameof(chEntity.SiteLinks), new EntitySiteLink("enwarriorswiki", title))); } await chEntity.EditAsync(edits, "Populate chapter.", EntityEditOptions.Bulk | EntityEditOptions.Bot); lastChapterId = chEntity.Id; } } }
public async Task PopulateAffiliationsAsync() { var processedEntities = GetProcessedEntities(); await zhWarriorsSite.Initialization; var counter = 0; foreach (var catg in GetCatsToProcess(processedEntities).Buffer(50)) { await catg.Select(t => t.ZhPage).RefreshAsync(PageQueryOptions.FetchContent); foreach (var(id, title, page) in catg) { counter++; Logger.LogInformation("[{}] Processing {} -> {}", counter, title, id); try { await EditEntityAsync(new Entity(Site, id), page); processedEntities.Add(id); } catch (KeyNotFoundException) { Logger.LogWarning("Missing entity."); } WriteProcessedEntities(processedEntities); } async Task EditEntityAsync(Entity entity, WikiPage page) { var root = parser.Parse(page.Content); var infobox = root.EnumDescendants().TemplatesWithTitle("Infobox cat").FirstOrDefault(); if (infobox == null) { Logger.LogError("No {{Infobox cat}} found."); return; } var pastAff = await ExtractAffiliationsEx(infobox.Arguments["past_affiliation"]?.Value); var curAff = await ExtractAffiliationsEx(infobox.Arguments["current_affiliation"]?.Value); var claims = new List <Claim>(); foreach (var(AffId, PosId) in pastAff.Concat(curAff)) { Claim c; if (AffId != null) { c = new Claim("P76", AffId, BuiltInDataTypes.WikibaseItem); } else { c = new Claim(new Snak("P76", SnakType.SomeValue)); } if (PosId != null) { c.Qualifiers.Add(new Snak("P92", PosId, BuiltInDataTypes.WikibaseItem)); } Logger.LogInformation("Affiliation: {}, Pos: {}", CPRepository.LabelFromEntity(AffId, "en"), PosId == null ? null : CPRepository.LabelFromEntity(PosId, "en")); claims.Add(c); } if (claims.Any()) { await entity.EditAsync(claims.Select(c => new EntityEditEntry(nameof(entity.Claims), c)), "Populate affiliations from zhwarriorswiki.", EntityEditOptions.Bot); } async Task <IList <(string AffId, string PosId)> > ExtractAffiliationsEx(Node afNode) { var rawAffiliations = afNode == null ? null : ExtractAffiliations(afNode); if (rawAffiliations == null || rawAffiliations.Count == 0) { return(new List <(string Name, string Position)>()); } async Task <(string, string)> SubTask(string aff, string book, string location) { var affid = CPRepository.EntityFromZhSiteLink(aff) ?? CPRepository.EntityFromLabel(aff); if (location != null && location.EndsWith("章")) { var pos = (await Site.SearchItemsAsync(book + "-" + location)).FirstOrDefault(); if (pos != null) { return(affid, pos); } } if (book != null) { if (!bookLocationCacheDict.TryGetValue(book, out var pos)) { pos = (await Site.SearchItemsAsync(book)).FirstOrDefault(); bookLocationCacheDict.TryAdd(book, pos); } if (pos != null) { return(affid, pos); } } return(affid, null); } var processed = await Task.WhenAll(rawAffiliations.Select(aff => SubTask(aff.Name, aff.Book, aff.Location))); return(processed); } } } }