public void WbMonolingualTextsCollectionTest() { var collection = new WbMonolingualTextsCollection(new Dictionary <string, IEnumerable <string> > { { "en", new[] { "Wikipedia" } } }); Assert.True(collection.Add("zh-hans", "维基百科")); Assert.True(collection.Add("zh-Hant", "維基百科")); Assert.True(collection.Add(new WbMonolingualText("ja", "ウィキペディア"))); Assert.False(collection.Add("zh-hans", "维基百科")); Assert.True(collection.Add("en", "WP")); collection["ru"] = new[] { "Википедия" }; Assert.Equal(6, ((ICollection <WbMonolingualText>)collection).Count); Assert.True(collection.ContainsLanguage("zh-HANS")); Assert.Contains(new WbMonolingualText("zh-hanT", "維基百科"), collection); Assert.Equal("ウィキペディア", collection["JA"].Single()); Assert.Equal(new WbMonolingualText("RU", "Википедия"), collection.TryGetMonolingualTexts("ru").Single()); ShallowTrace(collection); }
public async Task RunAsync() { var enWw = new WikiSite(Site.WikiClient, "http://warriors.wikia.com/api.php"); // var books = CPRepository.ExecuteQuery(@" //SELECT ?book ?link { // { ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. } // ?link schema:isPartOf <http://warriors.wikia.com/>; // schema:about ?book. //}") // .Select(r => (book: (UriNode)r["book"], link: (UriNode)r["link"])) // .ToList(); var books = CPRepository.ExecuteQuery(@" SELECT ?book ?label { { ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. } ?book rdfs:label ?label. FILTER (lang(?label) = 'en') }") .Select(r => (id: CPRepository.StripEntityUri(((UriNode)r["book"]).Uri), label: ((LiteralNode)r["label"]).Value)); await enWw.Initialization; foreach (var book in books) { string lastChapterId = null; var bookItem = new Entity(Site, book.id); var tlabel = book.label; RETRY: var parsingTask = enWw.ParseContentAsync("{{Chapters/b|" + tlabel + "}}", null, null, ParsingOptions.None); await bookItem.RefreshAsync(EntityQueryOptions.FetchLabels | EntityQueryOptions.FetchAliases, new[] { "en", "zh-cn", "zh-tw" }); var labelEn = bookItem.Labels["en"]; var labelCn = bookItem.Labels["zh-cn"] ?? labelEn; var labelTw = bookItem.Labels["zh-tw"] ?? labelCn; Logger.LogInformation("{}, {}, {}", labelEn, labelCn, labelTw); var doc = new HtmlDocument(); doc.LoadHtml((await parsingTask).Content); var nodes = doc.DocumentNode.SelectNodes("//a[@href]"); if (nodes == null) { if (!tlabel.Contains('(')) { tlabel += " (Book)"; goto RETRY; } Logger.LogError("No chapter information found."); continue; } foreach (var node in nodes) { var chLabels = new WbMonolingualTextCollection(); var chAliases = new WbMonolingualTextsCollection(); var chDescriptions = new WbMonolingualTextCollection(); var text = node.InnerText.Trim(); var n = TryMatchChapterNumber(text); var nId = n?.ToString(); if (n == null) { switch (text.ToLowerInvariant()) { case "prologue": chLabels["en"] = labelEn + ", Prologue"; chDescriptions["en"] = "prologue chapter of " + labelEn; foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-0"); } chLabels["zh-cn"] = "《" + labelCn + "》引子"; chDescriptions["zh-cn"] = "《" + labelCn + "》的引子章节"; chLabels["zh-tw"] = "《" + labelTw + "》序章"; chDescriptions["zh-tw"] = "《" + labelTw + "》的序章"; chAliases.Add("zh-cn", labelCn + " 引子"); chAliases.Add("zh-tw", labelTw + " 序章"); chAliases.Add("zh-cn", labelCn + " 0"); chAliases.Add("zh-tw", labelTw + " 0"); nId = "0"; break; case "epilogue": chLabels["en"] = labelEn + ", Epilogue"; chDescriptions["en"] = "epilogue chapter of " + labelEn; foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-E"); } chLabels["zh-cn"] = "《" + labelCn + "》尾声"; chDescriptions["zh-cn"] = "《" + labelCn + "》的尾声章节"; chLabels["zh-tw"] = "《" + labelTw + "》尾聲"; chDescriptions["zh-tw"] = "《" + labelTw + "》的尾聲章節"; chAliases.Add("zh-cn", labelCn + " 尾声"); chAliases.Add("zh-tw", labelTw + " 尾聲"); nId = "E"; break; default: chLabels["en"] = labelEn + ", " + text; chDescriptions["en"] = "a chapter of " + labelEn; var abbr = GetAbbr(text); foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-" + abbr); } chLabels["zh-cn"] = "《" + labelCn + "》" + text; chDescriptions["zh-cn"] = "《" + labelCn + "》的一个章节"; chLabels["zh-tw"] = "《" + labelTw + "》" + text; chDescriptions["zh-tw"] = "《" + labelTw + "》的一個章節"; chAliases.Add("zh-cn", labelCn + " " + abbr); chAliases.Add("zh-tw", labelTw + " " + abbr); break; } } else { chLabels["en"] = labelEn + ", Chapter " + n; chDescriptions["en"] = "Chapter " + n + " of " + labelEn; foreach (var a in bookItem.Aliases["en"]) { chAliases.Add("en", a + "-" + n); } var zhOrdinal = Utility.GetOrdinalZh(n.Value); chLabels["zh-cn"] = "《" + labelCn + "》第" + zhOrdinal + "章"; chDescriptions["zh-cn"] = "《" + labelCn + "》的第" + zhOrdinal + "章"; chLabels["zh-tw"] = "《" + labelTw + "》第" + zhOrdinal + "章"; chDescriptions["zh-tw"] = "《" + labelTw + "》的第" + zhOrdinal + "章"; chAliases.Add("zh-cn", labelCn + " " + n); chAliases.Add("zh-tw", labelTw + " " + n); } string cid = null; if ((cid = CPRepository.EntityFromLabel(chLabels["en"])) != null) { Logger.LogWarning("Entity exists."); lastChapterId = cid; if (labelEn.Contains("Hollyleaf's Story")) { continue; } break; } if (labelEn == labelCn) { chLabels.Remove("zh-cn"); chAliases.Remove("zh-cn"); chDescriptions.Remove("zh-cn"); } if (labelEn == labelTw || labelCn == labelTw) { chLabels.Remove("zh-tw"); chAliases.Remove("zh-tw"); chDescriptions.Remove("zh-tw"); } //foreach (var l in chLabels) Console.WriteLine(l); //foreach (var l in chAliases) Console.WriteLine(l); //foreach (var l in chDescriptions) Console.WriteLine(l); var claims = new List <Claim> { new Claim("P3", "Q109", BuiltInDataTypes.WikibaseItem), }; { var c = new Claim("P50", book.id, BuiltInDataTypes.WikibaseItem); if (nId != null) { c.Qualifiers.Add(new Snak("P53", nId, BuiltInDataTypes.String)); } if (lastChapterId != null) { c.Qualifiers.Add(new Snak("P48", lastChapterId, BuiltInDataTypes.WikibaseItem)); } claims.Add(c); } var chEntity = new Entity(Site, EntityType.Item); var edits = new List <EntityEditEntry>(); edits.AddRange(chLabels.Select(l => new EntityEditEntry(nameof(chEntity.Labels), l))); edits.AddRange(chAliases.Select(l => new EntityEditEntry(nameof(chEntity.Aliases), l))); edits.AddRange(chDescriptions.Select(l => new EntityEditEntry(nameof(chEntity.Descriptions), l))); edits.AddRange(claims.Select(c => new EntityEditEntry(nameof(chEntity.Claims), c))); if (!node.HasClass("new")) { var title = WebUtility.UrlDecode(node.GetAttributeValue("href", "").Replace("/wiki/", "")); edits.Add(new EntityEditEntry(nameof(chEntity.SiteLinks), new EntitySiteLink("enwarriorswiki", title))); } await chEntity.EditAsync(edits, "Populate chapter.", EntityEditOptions.Bulk | EntityEditOptions.Bot); lastChapterId = chEntity.Id; } } }