Example #1
0
        public async Task RunAsync()
        {
            var enWw = new WikiSite(Site.WikiClient, "http://warriors.wikia.com/api.php");
            //            var books = CPRepository.ExecuteQuery(@"
            //SELECT ?book ?link {
            //    { ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. }
            //    ?link   schema:isPartOf <http://warriors.wikia.com/>;
            //            schema:about ?book.
            //}")
            //                .Select(r => (book: (UriNode)r["book"], link: (UriNode)r["link"]))
            //                .ToList();
            var books = CPRepository.ExecuteQuery(@"
SELECT ?book ?label {
{ ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. }
?book rdfs:label ?label. FILTER (lang(?label) = 'en')
}")
                        .Select(r => (id: CPRepository.StripEntityUri(((UriNode)r["book"]).Uri), label: ((LiteralNode)r["label"]).Value));
            await enWw.Initialization;

            foreach (var book in books)
            {
                string lastChapterId = null;
                var    bookItem      = new Entity(Site, book.id);
                var    tlabel        = book.label;
RETRY:
                var parsingTask = enWw.ParseContentAsync("{{Chapters/b|" + tlabel + "}}", null, null, ParsingOptions.None);
                await bookItem.RefreshAsync(EntityQueryOptions.FetchLabels | EntityQueryOptions.FetchAliases, new[] { "en", "zh-cn", "zh-tw" });

                var labelEn = bookItem.Labels["en"];
                var labelCn = bookItem.Labels["zh-cn"] ?? labelEn;
                var labelTw = bookItem.Labels["zh-tw"] ?? labelCn;
                Logger.LogInformation("{}, {}, {}", labelEn, labelCn, labelTw);
                var doc = new HtmlDocument();
                doc.LoadHtml((await parsingTask).Content);
                var nodes = doc.DocumentNode.SelectNodes("//a[@href]");
                if (nodes == null)
                {
                    if (!tlabel.Contains('('))
                    {
                        tlabel += " (Book)";
                        goto RETRY;
                    }
                    Logger.LogError("No chapter information found.");
                    continue;
                }
                foreach (var node in nodes)
                {
                    var chLabels       = new WbMonolingualTextCollection();
                    var chAliases      = new WbMonolingualTextsCollection();
                    var chDescriptions = new WbMonolingualTextCollection();
                    var text           = node.InnerText.Trim();
                    var n   = TryMatchChapterNumber(text);
                    var nId = n?.ToString();
                    if (n == null)
                    {
                        switch (text.ToLowerInvariant())
                        {
                        case "prologue":
                            chLabels["en"]       = labelEn + ", Prologue";
                            chDescriptions["en"] = "prologue chapter of " + labelEn;
                            foreach (var a in bookItem.Aliases["en"])
                            {
                                chAliases.Add("en", a + "-0");
                            }
                            chLabels["zh-cn"]       = "《" + labelCn + "》引子";
                            chDescriptions["zh-cn"] = "《" + labelCn + "》的引子章节";
                            chLabels["zh-tw"]       = "《" + labelTw + "》序章";
                            chDescriptions["zh-tw"] = "《" + labelTw + "》的序章";
                            chAliases.Add("zh-cn", labelCn + " 引子");
                            chAliases.Add("zh-tw", labelTw + " 序章");
                            chAliases.Add("zh-cn", labelCn + " 0");
                            chAliases.Add("zh-tw", labelTw + " 0");
                            nId = "0";
                            break;

                        case "epilogue":
                            chLabels["en"]       = labelEn + ", Epilogue";
                            chDescriptions["en"] = "epilogue chapter of " + labelEn;
                            foreach (var a in bookItem.Aliases["en"])
                            {
                                chAliases.Add("en", a + "-E");
                            }
                            chLabels["zh-cn"]       = "《" + labelCn + "》尾声";
                            chDescriptions["zh-cn"] = "《" + labelCn + "》的尾声章节";
                            chLabels["zh-tw"]       = "《" + labelTw + "》尾聲";
                            chDescriptions["zh-tw"] = "《" + labelTw + "》的尾聲章節";
                            chAliases.Add("zh-cn", labelCn + " 尾声");
                            chAliases.Add("zh-tw", labelTw + " 尾聲");
                            nId = "E";
                            break;

                        default:
                            chLabels["en"]       = labelEn + ", " + text;
                            chDescriptions["en"] = "a chapter of " + labelEn;
                            var abbr = GetAbbr(text);
                            foreach (var a in bookItem.Aliases["en"])
                            {
                                chAliases.Add("en", a + "-" + abbr);
                            }
                            chLabels["zh-cn"]       = "《" + labelCn + "》" + text;
                            chDescriptions["zh-cn"] = "《" + labelCn + "》的一个章节";
                            chLabels["zh-tw"]       = "《" + labelTw + "》" + text;
                            chDescriptions["zh-tw"] = "《" + labelTw + "》的一個章節";
                            chAliases.Add("zh-cn", labelCn + " " + abbr);
                            chAliases.Add("zh-tw", labelTw + " " + abbr);
                            break;
                        }
                    }
                    else
                    {
                        chLabels["en"]       = labelEn + ", Chapter " + n;
                        chDescriptions["en"] = "Chapter " + n + " of " + labelEn;
                        foreach (var a in bookItem.Aliases["en"])
                        {
                            chAliases.Add("en", a + "-" + n);
                        }
                        var zhOrdinal = Utility.GetOrdinalZh(n.Value);
                        chLabels["zh-cn"]       = "《" + labelCn + "》第" + zhOrdinal + "章";
                        chDescriptions["zh-cn"] = "《" + labelCn + "》的第" + zhOrdinal + "章";
                        chLabels["zh-tw"]       = "《" + labelTw + "》第" + zhOrdinal + "章";
                        chDescriptions["zh-tw"] = "《" + labelTw + "》的第" + zhOrdinal + "章";
                        chAliases.Add("zh-cn", labelCn + " " + n);
                        chAliases.Add("zh-tw", labelTw + " " + n);
                    }
                    string cid = null;
                    if ((cid = CPRepository.EntityFromLabel(chLabels["en"])) != null)
                    {
                        Logger.LogWarning("Entity exists.");
                        lastChapterId = cid;
                        if (labelEn.Contains("Hollyleaf's Story"))
                        {
                            continue;
                        }
                        break;
                    }
                    if (labelEn == labelCn)
                    {
                        chLabels.Remove("zh-cn");
                        chAliases.Remove("zh-cn");
                        chDescriptions.Remove("zh-cn");
                    }
                    if (labelEn == labelTw || labelCn == labelTw)
                    {
                        chLabels.Remove("zh-tw");
                        chAliases.Remove("zh-tw");
                        chDescriptions.Remove("zh-tw");
                    }
                    //foreach (var l in chLabels) Console.WriteLine(l);
                    //foreach (var l in chAliases) Console.WriteLine(l);
                    //foreach (var l in chDescriptions) Console.WriteLine(l);
                    var claims = new List <Claim>
                    {
                        new Claim("P3", "Q109", BuiltInDataTypes.WikibaseItem),
                    };
                    {
                        var c = new Claim("P50", book.id, BuiltInDataTypes.WikibaseItem);
                        if (nId != null)
                        {
                            c.Qualifiers.Add(new Snak("P53", nId, BuiltInDataTypes.String));
                        }
                        if (lastChapterId != null)
                        {
                            c.Qualifiers.Add(new Snak("P48", lastChapterId, BuiltInDataTypes.WikibaseItem));
                        }
                        claims.Add(c);
                    }
                    var chEntity = new Entity(Site, EntityType.Item);
                    var edits    = new List <EntityEditEntry>();
                    edits.AddRange(chLabels.Select(l => new EntityEditEntry(nameof(chEntity.Labels), l)));
                    edits.AddRange(chAliases.Select(l => new EntityEditEntry(nameof(chEntity.Aliases), l)));
                    edits.AddRange(chDescriptions.Select(l => new EntityEditEntry(nameof(chEntity.Descriptions), l)));
                    edits.AddRange(claims.Select(c => new EntityEditEntry(nameof(chEntity.Claims), c)));
                    if (!node.HasClass("new"))
                    {
                        var title = WebUtility.UrlDecode(node.GetAttributeValue("href", "").Replace("/wiki/", ""));
                        edits.Add(new EntityEditEntry(nameof(chEntity.SiteLinks), new EntitySiteLink("enwarriorswiki", title)));
                    }
                    await chEntity.EditAsync(edits, "Populate chapter.", EntityEditOptions.Bulk | EntityEditOptions.Bot);

                    lastChapterId = chEntity.Id;
                }
            }
        }
Example #2
0
        public async Task PopulateAffiliationsAsync()
        {
            var processedEntities = GetProcessedEntities();
            await zhWarriorsSite.Initialization;
            var counter = 0;

            foreach (var catg in GetCatsToProcess(processedEntities).Buffer(50))
            {
                await catg.Select(t => t.ZhPage).RefreshAsync(PageQueryOptions.FetchContent);

                foreach (var(id, title, page) in catg)
                {
                    counter++;
                    Logger.LogInformation("[{}] Processing {} -> {}", counter, title, id);
                    try
                    {
                        await EditEntityAsync(new Entity(Site, id), page);

                        processedEntities.Add(id);
                    }
                    catch (KeyNotFoundException)
                    {
                        Logger.LogWarning("Missing entity.");
                    }
                    WriteProcessedEntities(processedEntities);
                }

                async Task EditEntityAsync(Entity entity, WikiPage page)
                {
                    var root    = parser.Parse(page.Content);
                    var infobox = root.EnumDescendants().TemplatesWithTitle("Infobox cat").FirstOrDefault();

                    if (infobox == null)
                    {
                        Logger.LogError("No {{Infobox cat}} found.");
                        return;
                    }
                    var pastAff = await ExtractAffiliationsEx(infobox.Arguments["past_affiliation"]?.Value);

                    var curAff = await ExtractAffiliationsEx(infobox.Arguments["current_affiliation"]?.Value);

                    var claims = new List <Claim>();

                    foreach (var(AffId, PosId) in pastAff.Concat(curAff))
                    {
                        Claim c;
                        if (AffId != null)
                        {
                            c = new Claim("P76", AffId, BuiltInDataTypes.WikibaseItem);
                        }
                        else
                        {
                            c = new Claim(new Snak("P76", SnakType.SomeValue));
                        }
                        if (PosId != null)
                        {
                            c.Qualifiers.Add(new Snak("P92", PosId, BuiltInDataTypes.WikibaseItem));
                        }
                        Logger.LogInformation("Affiliation: {}, Pos: {}", CPRepository.LabelFromEntity(AffId, "en"),
                                              PosId == null ? null : CPRepository.LabelFromEntity(PosId, "en"));
                        claims.Add(c);
                    }
                    if (claims.Any())
                    {
                        await entity.EditAsync(claims.Select(c => new EntityEditEntry(nameof(entity.Claims), c)),
                                               "Populate affiliations from zhwarriorswiki.", EntityEditOptions.Bot);
                    }

                    async Task <IList <(string AffId, string PosId)> > ExtractAffiliationsEx(Node afNode)
                    {
                        var rawAffiliations = afNode == null ? null : ExtractAffiliations(afNode);

                        if (rawAffiliations == null || rawAffiliations.Count == 0)
                        {
                            return(new List <(string Name, string Position)>());
                        }

                        async Task <(string, string)> SubTask(string aff, string book, string location)
                        {
                            var affid = CPRepository.EntityFromZhSiteLink(aff) ?? CPRepository.EntityFromLabel(aff);

                            if (location != null && location.EndsWith("章"))
                            {
                                var pos = (await Site.SearchItemsAsync(book + "-" + location)).FirstOrDefault();
                                if (pos != null)
                                {
                                    return(affid, pos);
                                }
                            }
                            if (book != null)
                            {
                                if (!bookLocationCacheDict.TryGetValue(book, out var pos))
                                {
                                    pos = (await Site.SearchItemsAsync(book)).FirstOrDefault();
                                    bookLocationCacheDict.TryAdd(book, pos);
                                }
                                if (pos != null)
                                {
                                    return(affid, pos);
                                }
                            }
                            return(affid, null);
                        }

                        var processed = await Task.WhenAll(rawAffiliations.Select(aff => SubTask(aff.Name, aff.Book, aff.Location)));

                        return(processed);
                    }
                }
            }
        }