public static IObservable <WikiPage> CreateCategoryDownloader(WikiSite site, string categoryTitle) { var page = new WikiPage(site, categoryTitle); var generator = new CategoryMembersGenerator(page); var observable = generator.EnumPagesAsync() .Where((page, b) => !page.IsSpecialPage) .ToObservable(); return(observable); }
public async Task RunAsync() { await zhWarriorsSite.Initialization; var gen = new CategoryMembersGenerator(zhWarriorsSite, "没有图片的猫物") { PaginationSize = 50, MemberTypes = CategoryMemberTypes.Page, }; var enu = gen.EnumPagesAsync(PageQueryOptions.FetchContent); var fixedPages = @" 焦风 微光毛_(黑莓星的风暴) ".Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries).Select(t => new WikiPage(zhWarriorsSite, t)).ToList(); await fixedPages.RefreshAsync(PageQueryOptions.FetchContent | PageQueryOptions.ResolveRedirects); enu = fixedPages.ToAsyncEnumerable(); var counter = 0; await foreach (var page in enu) { counter++; var query = CPRepository.CreateQuery(@" SELECT ?link { ?link schema:isPartOf <https://warriors.huijiwiki.com/>; schema:name @title. }"); query.SetLiteral("title", page.Title, "zh"); if (CPRepository.ExecuteQuery(query).Any()) { Logger.LogWarning("Exists {}", page); continue; } Logger.LogInformation("[{}] Processing {}", counter, page); RETRY: try { await ExportEntityAsync(page); } catch (WikiClientException ex) { Console.WriteLine(ex); Console.ReadKey(); await page.RefreshAsync(PageQueryOptions.FetchContent); goto RETRY; } } }
public async Task WikiaCategoryMembersGeneratorTest() { var site = await WikiaTestSiteAsync; var cat = new CategoryPage(site, "Category:BlogListingPage"); await cat.RefreshAsync(); Output.WriteLine(cat.ToString()); var generator = new CategoryMembersGenerator(cat) { PagingSize = 50 }; var pages = generator.EnumPages().ToList(); TracePages(pages); AssertTitlesDistinct(pages); Assert.Equal(cat.MembersCount, pages.Count); }
static async Task Main(string[] args) { var client = new WikiClient { ClientUserAgent = "ClientWikiMaintenance/1.0" }; var site = new WikiSite(client, Credentials.ApiEndpoint); await site.Initialization; await site.LoginAsync(Credentials.UserName, Credentials.Password); site.ModificationThrottler.ThrottleTime = TimeSpan.FromSeconds(1); var gen = new CategoryMembersGenerator(site, "猫物") { MemberTypes = CategoryMemberTypes.Page, PaginationSize = 50 }; await WorkAsync(gen.EnumPagesAsync(PageQueryOptions.FetchContent)); }
public async Task RunAsync() { await zhWarriorsSite.Initialization; var gen = new CategoryMembersGenerator(zhWarriorsSite, "猫物") { PaginationSize = 50, MemberTypes = CategoryMemberTypes.Page, }; var counter = 0; using (var ie = gen.EnumPagesAsync(PageQueryOptions.FetchContent).GetEnumerator()) { while (await ie.MoveNext()) { counter++; var page = ie.Current; var query = CPRepository.CreateQuery(@" SELECT ?link { ?link schema:isPartOf <https://warriors.huijiwiki.com/>; schema:name @title. }"); query.SetLiteral("title", page.Title, "zh"); if (CPRepository.ExecuteQuery(query).Any()) { Logger.LogWarning("Exists {}", page); continue; } Logger.LogInformation("[{}] Processing {}", counter, page); RETRY: try { await ExportEntityAsync(page); } catch (WikiClientException ex) { Console.WriteLine(ex); Console.ReadKey(); await page.RefreshAsync(PageQueryOptions.FetchContent); goto RETRY; } } } }
public async Task WpCategoryMembersGeneratorTest() { var site = await WpTest2SiteAsync; var cat = new WikiPage(site, "Category:Template documentation pages"); await cat.RefreshAsync(); Output.WriteLine(cat.ToString()); var generator = new CategoryMembersGenerator(cat) { PaginationSize = 50 }; var pages = await generator.EnumPagesAsync().ToList(); TracePages(pages); AssertTitlesDistinct(pages); var catInfo = cat.GetPropertyGroup <CategoryInfoPropertyGroup>(); Assert.Equal(catInfo.MembersCount, pages.Count); }
public async Task WikiaCategoryMembersGeneratorTest() { var site = await WikiaTestSiteAsync; var cat = new WikiPage(site, "Category:BlogListingPage"); await cat.RefreshAsync(); WriteOutput(cat); var generator = new CategoryMembersGenerator(cat) { PaginationSize = 50 }; var pages = await generator.EnumPagesAsync().ToListAsync(); TracePages(pages); AssertTitlesDistinct(pages); var catInfo = cat.GetPropertyGroup <CategoryInfoPropertyGroup>(); Assert.Equal(catInfo.MembersCount, pages.Count); }
static async Task HelloWikiGenerators() { // Create a MediaWiki API client. var wikiClient = new WikiClient(); // Create a MediaWiki Site instance. var site = new WikiSite(wikiClient, "https://en.wikipedia.org/w/api.php"); await site.Initialization; // List all pages starting from item "Wiki", without redirect pages. var allpages = new AllPagesGenerator(site) { StartTitle = "Wiki", RedirectsFilter = PropertyFilterOption.WithoutProperty }; // Take the first 1000 results var pages = await allpages.EnumPagesAsync().Take(1000).ToList(); foreach (var p in pages) { Console.WriteLine("{0, -30} {1, 8}B {2}", p, p.ContentLength, p.LastTouched); } // List the first 10 subcategories in Category:Cats Console.WriteLine(); Console.WriteLine("Cats"); var catmembers = new CategoryMembersGenerator(site, "Category:Cats") { MemberTypes = CategoryMemberTypes.Subcategory }; pages = await catmembers.EnumPagesAsync().Take(10).ToList(); foreach (var p in pages) { Console.WriteLine("{0, -30} {1, 8}B {2}", p, p.ContentLength, p.LastTouched); } }
public async Task CollectAsync(string outputPath) { var site = await WikiSite.CreateAsync(wikiClient, EndpointUrl); site.Logger = WikiLogger.Default; var gen = new CategoryMembersGenerator(site, "Characters") { PagingSize = 50, MemberTypes = CategoryMemberTypes.Page, }; var counter = 0; using (var writer = File.CreateText(outputPath)) { writer.WriteLine("% Powered by Graywing Prolog Generator."); writer.WriteLine("% Generated from Warriors Wiki on {0:O}.", DateTime.Now); writer.WriteLine(); writer.WriteLine(":- discontiguous[name/2, male/1, female/1, belongsto/2, child/3, apprentice/2]."); writer.WriteLine(); await gen.EnumPagesAsync(PageQueryOptions.FetchContent) //.Take(100) .ForEachAsync(page => { counter++; Console.WriteLine("{0}: {1}", counter, page.Title); writer.Write("% "); writer.WriteLine(page.Title); foreach (var l in FactsFromPage(page)) { writer.WriteLine(l); } writer.WriteLine(); writer.Flush(); }); } }