Ejemplo n.º 1
0
        private IEnumerable <(string Id, string Title, WikiPage ZhPage)> GetCatsToProcess(ICollection <string> processed)
        {
            var cats = GetCats();

            return(cats.Select(c => (id: CPRepository.StripEntityUri(((IUriNode)c.Value("cat")).Uri), title: c.Value("title").AsValuedNode().AsString()))
                   .Where(t => !processed.Contains(t.id))
                   .Select(t => (t.id, t.title, page: new WikiPage(zhWarriorsSite, t.title))));
        }
Ejemplo n.º 2
0
 private SparqlResultSet GetCats()
 {
     return(CPRepository.ExecuteQuery(@"
             SELECT ?cat ?title {
                 ?cat wdt:P3 wd:Q622.
                 ?link   schema:isPartOf <https://warriors.huijiwiki.com/>;
                         schema:about ?cat;
                         schema:name ?title.
             }"));
 }
Ejemplo n.º 3
0
        public async Task RunAsync()
        {
            await zhWarriorsSite.Initialization;
            var gen = new CategoryMembersGenerator(zhWarriorsSite, "没有图片的猫物")
            {
                PaginationSize = 50,
                MemberTypes    = CategoryMemberTypes.Page,
            };
            var enu        = gen.EnumPagesAsync(PageQueryOptions.FetchContent);
            var fixedPages = @"
焦风
微光毛_(黑莓星的风暴)
".Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries).Select(t => new WikiPage(zhWarriorsSite, t)).ToList();
            await fixedPages.RefreshAsync(PageQueryOptions.FetchContent | PageQueryOptions.ResolveRedirects);

            enu = fixedPages.ToAsyncEnumerable();
            var counter = 0;

            await foreach (var page in enu)
            {
                counter++;
                var query = CPRepository.CreateQuery(@"
                    SELECT ?link {
                        ?link   schema:isPartOf <https://warriors.huijiwiki.com/>;
                                schema:name @title.
                    }");
                query.SetLiteral("title", page.Title, "zh");
                if (CPRepository.ExecuteQuery(query).Any())
                {
                    Logger.LogWarning("Exists {}", page);
                    continue;
                }
                Logger.LogInformation("[{}] Processing {}", counter, page);
RETRY:
                try
                {
                    await ExportEntityAsync(page);
                }
                catch (WikiClientException ex)
                {
                    Console.WriteLine(ex);
                    Console.ReadKey();
                    await page.RefreshAsync(PageQueryOptions.FetchContent);

                    goto RETRY;
                }
            }
        }
Ejemplo n.º 4
0
        public async Task RunAsync()
        {
            await zhWarriorsSite.Initialization;
            var gen = new CategoryMembersGenerator(zhWarriorsSite, "猫物")
            {
                PaginationSize = 50,
                MemberTypes    = CategoryMemberTypes.Page,
            };
            var counter = 0;

            using (var ie = gen.EnumPagesAsync(PageQueryOptions.FetchContent).GetEnumerator())
            {
                while (await ie.MoveNext())
                {
                    counter++;
                    var page  = ie.Current;
                    var query = CPRepository.CreateQuery(@"
                    SELECT ?link {
                        ?link   schema:isPartOf <https://warriors.huijiwiki.com/>;
                                schema:name @title.
                    }");
                    query.SetLiteral("title", page.Title, "zh");
                    if (CPRepository.ExecuteQuery(query).Any())
                    {
                        Logger.LogWarning("Exists {}", page);
                        continue;
                    }
                    Logger.LogInformation("[{}] Processing {}", counter, page);
RETRY:
                    try
                    {
                        await ExportEntityAsync(page);
                    }
                    catch (WikiClientException ex)
                    {
                        Console.WriteLine(ex);
                        Console.ReadKey();
                        await page.RefreshAsync(PageQueryOptions.FetchContent);

                        goto RETRY;
                    }
                }
            }
        }
Ejemplo n.º 5
0
        public async Task RunAsync()
        {
            var enWw = new WikiSite(Site.WikiClient, "http://warriors.wikia.com/api.php");
            //            var books = CPRepository.ExecuteQuery(@"
            //SELECT ?book ?link {
            //    { ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. }
            //    ?link   schema:isPartOf <http://warriors.wikia.com/>;
            //            schema:about ?book.
            //}")
            //                .Select(r => (book: (UriNode)r["book"], link: (UriNode)r["link"]))
            //                .ToList();
            var books = CPRepository.ExecuteQuery(@"
SELECT ?book ?label {
{ ?book wdt:P3 wd:Q46. } UNION { ?book wdt:P3 wd:Q116. }
?book rdfs:label ?label. FILTER (lang(?label) = 'en')
}")
                        .Select(r => (id: CPRepository.StripEntityUri(((UriNode)r["book"]).Uri), label: ((LiteralNode)r["label"]).Value));
            await enWw.Initialization;

            foreach (var book in books)
            {
                string lastChapterId = null;
                var    bookItem      = new Entity(Site, book.id);
                var    tlabel        = book.label;
RETRY:
                var parsingTask = enWw.ParseContentAsync("{{Chapters/b|" + tlabel + "}}", null, null, ParsingOptions.None);
                await bookItem.RefreshAsync(EntityQueryOptions.FetchLabels | EntityQueryOptions.FetchAliases, new[] { "en", "zh-cn", "zh-tw" });

                var labelEn = bookItem.Labels["en"];
                var labelCn = bookItem.Labels["zh-cn"] ?? labelEn;
                var labelTw = bookItem.Labels["zh-tw"] ?? labelCn;
                Logger.LogInformation("{}, {}, {}", labelEn, labelCn, labelTw);
                var doc = new HtmlDocument();
                doc.LoadHtml((await parsingTask).Content);
                var nodes = doc.DocumentNode.SelectNodes("//a[@href]");
                if (nodes == null)
                {
                    if (!tlabel.Contains('('))
                    {
                        tlabel += " (Book)";
                        goto RETRY;
                    }
                    Logger.LogError("No chapter information found.");
                    continue;
                }
                foreach (var node in nodes)
                {
                    var chLabels       = new WbMonolingualTextCollection();
                    var chAliases      = new WbMonolingualTextsCollection();
                    var chDescriptions = new WbMonolingualTextCollection();
                    var text           = node.InnerText.Trim();
                    var n   = TryMatchChapterNumber(text);
                    var nId = n?.ToString();
                    if (n == null)
                    {
                        switch (text.ToLowerInvariant())
                        {
                        case "prologue":
                            chLabels["en"]       = labelEn + ", Prologue";
                            chDescriptions["en"] = "prologue chapter of " + labelEn;
                            foreach (var a in bookItem.Aliases["en"])
                            {
                                chAliases.Add("en", a + "-0");
                            }
                            chLabels["zh-cn"]       = "《" + labelCn + "》引子";
                            chDescriptions["zh-cn"] = "《" + labelCn + "》的引子章节";
                            chLabels["zh-tw"]       = "《" + labelTw + "》序章";
                            chDescriptions["zh-tw"] = "《" + labelTw + "》的序章";
                            chAliases.Add("zh-cn", labelCn + " 引子");
                            chAliases.Add("zh-tw", labelTw + " 序章");
                            chAliases.Add("zh-cn", labelCn + " 0");
                            chAliases.Add("zh-tw", labelTw + " 0");
                            nId = "0";
                            break;

                        case "epilogue":
                            chLabels["en"]       = labelEn + ", Epilogue";
                            chDescriptions["en"] = "epilogue chapter of " + labelEn;
                            foreach (var a in bookItem.Aliases["en"])
                            {
                                chAliases.Add("en", a + "-E");
                            }
                            chLabels["zh-cn"]       = "《" + labelCn + "》尾声";
                            chDescriptions["zh-cn"] = "《" + labelCn + "》的尾声章节";
                            chLabels["zh-tw"]       = "《" + labelTw + "》尾聲";
                            chDescriptions["zh-tw"] = "《" + labelTw + "》的尾聲章節";
                            chAliases.Add("zh-cn", labelCn + " 尾声");
                            chAliases.Add("zh-tw", labelTw + " 尾聲");
                            nId = "E";
                            break;

                        default:
                            chLabels["en"]       = labelEn + ", " + text;
                            chDescriptions["en"] = "a chapter of " + labelEn;
                            var abbr = GetAbbr(text);
                            foreach (var a in bookItem.Aliases["en"])
                            {
                                chAliases.Add("en", a + "-" + abbr);
                            }
                            chLabels["zh-cn"]       = "《" + labelCn + "》" + text;
                            chDescriptions["zh-cn"] = "《" + labelCn + "》的一个章节";
                            chLabels["zh-tw"]       = "《" + labelTw + "》" + text;
                            chDescriptions["zh-tw"] = "《" + labelTw + "》的一個章節";
                            chAliases.Add("zh-cn", labelCn + " " + abbr);
                            chAliases.Add("zh-tw", labelTw + " " + abbr);
                            break;
                        }
                    }
                    else
                    {
                        chLabels["en"]       = labelEn + ", Chapter " + n;
                        chDescriptions["en"] = "Chapter " + n + " of " + labelEn;
                        foreach (var a in bookItem.Aliases["en"])
                        {
                            chAliases.Add("en", a + "-" + n);
                        }
                        var zhOrdinal = Utility.GetOrdinalZh(n.Value);
                        chLabels["zh-cn"]       = "《" + labelCn + "》第" + zhOrdinal + "章";
                        chDescriptions["zh-cn"] = "《" + labelCn + "》的第" + zhOrdinal + "章";
                        chLabels["zh-tw"]       = "《" + labelTw + "》第" + zhOrdinal + "章";
                        chDescriptions["zh-tw"] = "《" + labelTw + "》的第" + zhOrdinal + "章";
                        chAliases.Add("zh-cn", labelCn + " " + n);
                        chAliases.Add("zh-tw", labelTw + " " + n);
                    }
                    string cid = null;
                    if ((cid = CPRepository.EntityFromLabel(chLabels["en"])) != null)
                    {
                        Logger.LogWarning("Entity exists.");
                        lastChapterId = cid;
                        if (labelEn.Contains("Hollyleaf's Story"))
                        {
                            continue;
                        }
                        break;
                    }
                    if (labelEn == labelCn)
                    {
                        chLabels.Remove("zh-cn");
                        chAliases.Remove("zh-cn");
                        chDescriptions.Remove("zh-cn");
                    }
                    if (labelEn == labelTw || labelCn == labelTw)
                    {
                        chLabels.Remove("zh-tw");
                        chAliases.Remove("zh-tw");
                        chDescriptions.Remove("zh-tw");
                    }
                    //foreach (var l in chLabels) Console.WriteLine(l);
                    //foreach (var l in chAliases) Console.WriteLine(l);
                    //foreach (var l in chDescriptions) Console.WriteLine(l);
                    var claims = new List <Claim>
                    {
                        new Claim("P3", "Q109", BuiltInDataTypes.WikibaseItem),
                    };
                    {
                        var c = new Claim("P50", book.id, BuiltInDataTypes.WikibaseItem);
                        if (nId != null)
                        {
                            c.Qualifiers.Add(new Snak("P53", nId, BuiltInDataTypes.String));
                        }
                        if (lastChapterId != null)
                        {
                            c.Qualifiers.Add(new Snak("P48", lastChapterId, BuiltInDataTypes.WikibaseItem));
                        }
                        claims.Add(c);
                    }
                    var chEntity = new Entity(Site, EntityType.Item);
                    var edits    = new List <EntityEditEntry>();
                    edits.AddRange(chLabels.Select(l => new EntityEditEntry(nameof(chEntity.Labels), l)));
                    edits.AddRange(chAliases.Select(l => new EntityEditEntry(nameof(chEntity.Aliases), l)));
                    edits.AddRange(chDescriptions.Select(l => new EntityEditEntry(nameof(chEntity.Descriptions), l)));
                    edits.AddRange(claims.Select(c => new EntityEditEntry(nameof(chEntity.Claims), c)));
                    if (!node.HasClass("new"))
                    {
                        var title = WebUtility.UrlDecode(node.GetAttributeValue("href", "").Replace("/wiki/", ""));
                        edits.Add(new EntityEditEntry(nameof(chEntity.SiteLinks), new EntitySiteLink("enwarriorswiki", title)));
                    }
                    await chEntity.EditAsync(edits, "Populate chapter.", EntityEditOptions.Bulk | EntityEditOptions.Bot);

                    lastChapterId = chEntity.Id;
                }
            }
        }
Ejemplo n.º 6
0
        public async Task PopulateRelationsAsync()
        {
            var processedEntities = GetProcessedEntities();
            await zhWarriorsSite.Initialization;
            var counter = 0;

            foreach (var catg in GetCatsToProcess(processedEntities).Buffer(50))
            {
                await catg.Select(t => t.ZhPage).RefreshAsync(PageQueryOptions.FetchContent);

                foreach (var(id, title, page) in catg)
                {
                    counter++;
                    Logger.LogInformation("[{}] Processing {} -> {}", counter, title, id);
                    try
                    {
                        await EditEntityAsync(new Entity(Site, id), page);

                        processedEntities.Add(id);
                    }
                    catch (KeyNotFoundException)
                    {
                        Logger.LogWarning("Missing entity.");
                    }
                    WriteProcessedEntities(processedEntities);
                }
            }

            async Task EditEntityAsync(Entity entity, WikiPage page)
            {
                var root    = parser.Parse(page.Content);
                var infobox = root.EnumDescendants().TemplatesWithTitle("Infobox cat").FirstOrDefault();

                if (infobox == null)
                {
                    Logger.LogError("No {{Infobox cat}} found.");
                    return;
                }
                var father  = infobox.Arguments["father"]?.Value.EnumDescendants().OfType <WikiLink>().FirstOrDefault()?.Target.ToPlainText();
                var mother  = infobox.Arguments["mother"]?.Value.EnumDescendants().OfType <WikiLink>().FirstOrDefault()?.Target.ToPlainText();
                var mates   = infobox.Arguments["mate"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.ToPlainText()).ToList();
                var fosters = infobox.Arguments["foster_father"]?.Value.EnumDescendants()
                              .Concat(infobox.Arguments["foster_mother"]?.Value.EnumDescendants() ?? Enumerable.Empty <Node>())
                              .OfType <WikiLink>().Select(l => l.Target.ToPlainText()).ToList();
                var mentors = infobox.Arguments["mentor"]?.Value.EnumDescendants().OfType <WikiLink>().Select(l => l.Target.ToPlainText()).ToList();

                Console.WriteLine(father);
                Console.WriteLine(mother);
                Console.WriteLine(string.Join(";", mates));
                Console.WriteLine(string.Join(";", fosters));
                Console.WriteLine(string.Join(";", mentors));

                var claims = new List <Claim>();

                if (father != null)
                {
                    var f = CPRepository.EntityFromZhSiteLink(father);
                    if (f == null)
                    {
                        WriteMissingEntity(father);
                        throw new KeyNotFoundException();
                    }
                    claims.Add(new Claim("P88", f, BuiltInDataTypes.WikibaseItem));
                }
                if (mother != null)
                {
                    var m = CPRepository.EntityFromZhSiteLink(mother);
                    if (m == null)
                    {
                        WriteMissingEntity(mother);
                        throw new KeyNotFoundException();
                    }
                    claims.Add(new Claim("P89", m, BuiltInDataTypes.WikibaseItem));
                }
                if (fosters != null)
                {
                    foreach (var foster in fosters)
                    {
                        var f = CPRepository.EntityFromZhSiteLink(foster);
                        if (f == null)
                        {
                            WriteMissingEntity(foster);
                            throw new KeyNotFoundException();
                        }
                        claims.Add(new Claim("P99", f, BuiltInDataTypes.WikibaseItem));
                    }
                }
                if (mates != null)
                {
                    var index = 1;
                    foreach (var mate in mates)
                    {
                        var f = CPRepository.EntityFromZhSiteLink(mate);
                        if (f == null)
                        {
                            WriteMissingEntity(mate);
                            throw new KeyNotFoundException();
                        }
                        claims.Add(new Claim("P100", f, BuiltInDataTypes.WikibaseItem)
                        {
                            Qualifiers = { new Snak("P53", index.ToString(), BuiltInDataTypes.String) }
                        });
                        index++;
                    }
                }
                if (mentors != null)
                {
                    foreach (var mentor in mentors)
                    {
                        var f = CPRepository.EntityFromZhSiteLink(mentor);
                        if (f == null)
                        {
                            WriteMissingEntity(mentor);
                            throw new KeyNotFoundException();
                        }
                        claims.Add(new Claim("P86", f, BuiltInDataTypes.WikibaseItem));
                    }
                }
                if (claims.Any())
                {
                    await entity.EditAsync(claims.Select(c => new EntityEditEntry(nameof(entity.Claims), c)),
                                           "Populate relations from zhwarriorswiki.", EntityEditOptions.Bot);
                }
            }
        }
Ejemplo n.º 7
0
        public async Task PopulateAffiliationsAsync()
        {
            var processedEntities = GetProcessedEntities();
            await zhWarriorsSite.Initialization;
            var counter = 0;

            foreach (var catg in GetCatsToProcess(processedEntities).Buffer(50))
            {
                await catg.Select(t => t.ZhPage).RefreshAsync(PageQueryOptions.FetchContent);

                foreach (var(id, title, page) in catg)
                {
                    counter++;
                    Logger.LogInformation("[{}] Processing {} -> {}", counter, title, id);
                    try
                    {
                        await EditEntityAsync(new Entity(Site, id), page);

                        processedEntities.Add(id);
                    }
                    catch (KeyNotFoundException)
                    {
                        Logger.LogWarning("Missing entity.");
                    }
                    WriteProcessedEntities(processedEntities);
                }

                async Task EditEntityAsync(Entity entity, WikiPage page)
                {
                    var root    = parser.Parse(page.Content);
                    var infobox = root.EnumDescendants().TemplatesWithTitle("Infobox cat").FirstOrDefault();

                    if (infobox == null)
                    {
                        Logger.LogError("No {{Infobox cat}} found.");
                        return;
                    }
                    var pastAff = await ExtractAffiliationsEx(infobox.Arguments["past_affiliation"]?.Value);

                    var curAff = await ExtractAffiliationsEx(infobox.Arguments["current_affiliation"]?.Value);

                    var claims = new List <Claim>();

                    foreach (var(AffId, PosId) in pastAff.Concat(curAff))
                    {
                        Claim c;
                        if (AffId != null)
                        {
                            c = new Claim("P76", AffId, BuiltInDataTypes.WikibaseItem);
                        }
                        else
                        {
                            c = new Claim(new Snak("P76", SnakType.SomeValue));
                        }
                        if (PosId != null)
                        {
                            c.Qualifiers.Add(new Snak("P92", PosId, BuiltInDataTypes.WikibaseItem));
                        }
                        Logger.LogInformation("Affiliation: {}, Pos: {}", CPRepository.LabelFromEntity(AffId, "en"),
                                              PosId == null ? null : CPRepository.LabelFromEntity(PosId, "en"));
                        claims.Add(c);
                    }
                    if (claims.Any())
                    {
                        await entity.EditAsync(claims.Select(c => new EntityEditEntry(nameof(entity.Claims), c)),
                                               "Populate affiliations from zhwarriorswiki.", EntityEditOptions.Bot);
                    }

                    async Task <IList <(string AffId, string PosId)> > ExtractAffiliationsEx(Node afNode)
                    {
                        var rawAffiliations = afNode == null ? null : ExtractAffiliations(afNode);

                        if (rawAffiliations == null || rawAffiliations.Count == 0)
                        {
                            return(new List <(string Name, string Position)>());
                        }

                        async Task <(string, string)> SubTask(string aff, string book, string location)
                        {
                            var affid = CPRepository.EntityFromZhSiteLink(aff) ?? CPRepository.EntityFromLabel(aff);

                            if (location != null && location.EndsWith("章"))
                            {
                                var pos = (await Site.SearchItemsAsync(book + "-" + location)).FirstOrDefault();
                                if (pos != null)
                                {
                                    return(affid, pos);
                                }
                            }
                            if (book != null)
                            {
                                if (!bookLocationCacheDict.TryGetValue(book, out var pos))
                                {
                                    pos = (await Site.SearchItemsAsync(book)).FirstOrDefault();
                                    bookLocationCacheDict.TryAdd(book, pos);
                                }
                                if (pos != null)
                                {
                                    return(affid, pos);
                                }
                            }
                            return(affid, null);
                        }

                        var processed = await Task.WhenAll(rawAffiliations.Select(aff => SubTask(aff.Name, aff.Book, aff.Location)));

                        return(processed);
                    }
                }
            }
        }