/// <summary> /// Refresh a sequence of revisions by revid, along with their owner pages. /// </summary> /// <remarks> /// <para>If there's invalid revision id in <paramref name="revIds"/>, an <see cref="ArgumentException"/> will be thrown while enumerating.</para> /// </remarks> public static IAsyncEnumerable <Revision> FetchRevisionsAsync(WikiSite site, IEnumerable <int> revIds, IWikiPageQueryProvider options, CancellationToken cancellationToken) { if (revIds == null) { throw new ArgumentNullException(nameof(revIds)); } var queryParams = options.EnumParameters(site.SiteInfo.Version).ToDictionary(); // Remove any rvlimit magic word generated by RevisionsPropertyProvider. // We are only fetching by revisions. queryParams.Remove("rvlimit"); var titleLimit = options.GetMaxPaginationSize(site.SiteInfo.Version, site.AccountInfo.HasRight(UserRights.ApiHighLimits)); return(AsyncEnumerableFactory.FromAsyncGenerator <Revision>(async sink => { // Page ID --> Page Stub var stubDict = new Dictionary <int, WikiPageStub>(); var revDict = new Dictionary <int, Revision>(); using (site.BeginActionScope(null, (object)revIds)) { foreach (var partition in revIds.Partition(titleLimit)) { site.Logger.LogDebug("Fetching {Count} revisions from {Site}.", partition.Count, site); queryParams["revids"] = MediaWikiHelper.JoinValues(partition); var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken); var jpages = (JObject)jobj["query"]["pages"]; // Generate stubs first foreach (var p in jpages) { var jrevs = p.Value["revisions"]; if (jrevs == null || !jrevs.HasValues) { continue; } var id = Convert.ToInt32(p.Key); if (!stubDict.TryGetValue(id, out var stub)) { stub = new WikiPageStub(id, (string)p.Value["title"], (int)p.Value["ns"]); stubDict.Add(id, stub); } foreach (var jrev in jrevs) { var rev = jrev.ToObject <Revision>(Utility.WikiJsonSerializer); rev.Page = stub; revDict.Add(rev.Id, rev); } } await sink.YieldAndWait(partition.Select(id => revDict.TryGetValue(id, out var rev) ? rev : null)); } } })); }
/// <inheritdoc /> public int GetMaxPaginationSize(MediaWikiVersion version, bool apiHighLimits) { return(underlyingProvider.GetMaxPaginationSize(version, apiHighLimits)); }
/// <summary> /// Refresh a sequence of pages. /// </summary> public static async Task RefreshPagesAsync(IEnumerable <WikiPage> pages, IWikiPageQueryProvider options, CancellationToken cancellationToken) { if (pages == null) { throw new ArgumentNullException(nameof(pages)); } // You can even fetch pages from different sites. foreach (var sitePages in pages.GroupBy(p => new WikiPageGroupKey(p))) { var site = sitePages.Key.Site; var queryParams = options.EnumParameters().ToDictionary(); var titleLimit = options.GetMaxPaginationSize(site.AccountInfo.HasRight(UserRights.ApiHighLimits)); using (site.BeginActionScope(sitePages, options)) { foreach (var partition in sitePages.Partition(titleLimit)) { if (sitePages.Key.HasTitle) { // If a page has both title and ID information, // we will use title anyway. site.Logger.LogDebug("Fetching {Count} pages by title.", partition.Count); queryParams["titles"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Title)); } else { site.Logger.LogDebug("Fetching {Count} pages by ID.", partition.Count); Debug.Assert(sitePages.All(p => p.PageStub.HasId)); queryParams["pageids"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Id)); } // For single-page fetching, force fetching 1 revision only. if (partition.Count == 1) { queryParams["rvlimit"] = 1; } else { queryParams.Remove("rvlimit"); } var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken); if (sitePages.Key.HasTitle) { // Process title normalization. var normalized = jobj["query"]["normalized"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); // Process redirects. var redirects = jobj["query"]["redirects"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); var pageInfoDict = ((JObject)jobj["query"]["pages"]).Properties() .ToDictionary(p => (string)p.Value["title"]); foreach (var page in partition) { var title = page.Title; // Normalize the title first. if (normalized?.ContainsKey(title) ?? false) { title = normalized[title]; } // Then process the redirects. var redirectTrace = new List <string>(); while (redirects?.ContainsKey(title) ?? false) { redirectTrace.Add(title); // Adds the last title var next = redirects[title]; if (redirectTrace.Contains(next)) { throw new InvalidOperationException($"Cannot resolve circular redirect: {string.Join("->", redirectTrace)}."); } title = next; } // Finally, get the page. var pageInfo = pageInfoDict[title]; if (redirectTrace.Count > 0) { page.RedirectPath = redirectTrace; } MediaWikiHelper.PopulatePageFromJson(page, (JObject)pageInfo.Value, options); } } else { foreach (var page in partition) { var jPage = (JObject)jobj["query"]["pages"][page.Id.ToString()]; MediaWikiHelper.PopulatePageFromJson(page, jPage, options); } } } } } }
/// <summary> /// Refresh a sequence of pages. /// </summary> public static async Task RefreshPagesAsync(IEnumerable <WikiPage> pages, IWikiPageQueryProvider options, CancellationToken cancellationToken) { if (pages == null) { throw new ArgumentNullException(nameof(pages)); } // You can even fetch pages from different sites. foreach (var sitePages in pages.GroupBy(p => new WikiPageGroupKey(p))) { var site = sitePages.Key.Site; var queryParams = options.EnumParameters(site.SiteInfo.Version).ToDictionary(); var titleLimit = options.GetMaxPaginationSize(site.SiteInfo.Version, site.AccountInfo.HasRight(UserRights.ApiHighLimits)); using (site.BeginActionScope(sitePages, options)) { foreach (var partition in sitePages.Partition(titleLimit)) { if (sitePages.Key.HasTitle) { // If a page has both title and ID information, // we will use title anyway. site.Logger.LogDebug("Fetching {Count} pages by title.", partition.Count); queryParams["titles"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Title)); } else { site.Logger.LogDebug("Fetching {Count} pages by ID.", partition.Count); Debug.Assert(sitePages.All(p => p.PageStub.HasId)); queryParams["pageids"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Id)); } var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken); var jquery = (JObject)jobj["query"]; var continuationStatus = ParseContinuationParameters(jobj, queryParams, null); // Process continuation caused by props (e.g. langlinks) that contain a list that is too long. if (continuationStatus != CONTINUATION_DONE) { var queryParams1 = new Dictionary <string, object>(); var continuationParams = new Dictionary <string, object>(); var jobj1 = jobj; ParseContinuationParameters(jobj1, queryParams1, continuationParams); while (continuationStatus != CONTINUATION_DONE) { if (continuationStatus == CONTINUATION_LOOP) { throw new UnexpectedDataException(Prompts.ExceptionUnexpectedContinuationLoop); } Debug.Assert(continuationStatus == CONTINUATION_AVAILABLE); site.Logger.LogDebug("Detected query continuation. PartitionCount={PartitionCount}.", partition.Count); queryParams1.Clear(); queryParams1.MergeFrom(queryParams); queryParams1.MergeFrom(continuationParams); jobj1 = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams1), cancellationToken); var jquery1 = jobj1["query"]; if (jquery1.HasValues) { // Merge JSON response jquery.Merge(jquery1); } continuationStatus = ParseContinuationParameters(jobj1, queryParams1, continuationParams); } } if (sitePages.Key.HasTitle) { // Process title normalization. var normalized = jquery["normalized"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); // Process redirects. var redirects = jquery["redirects"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); var pageInfoDict = ((JObject)jquery["pages"]).Properties() .ToDictionary(p => (string)p.Value["title"]); foreach (var page in partition) { var title = page.Title; // Normalize the title first. if (normalized?.ContainsKey(title) ?? false) { title = normalized[title]; } // Then process the redirects. var redirectTrace = new List <string>(); while (redirects?.ContainsKey(title) ?? false) { redirectTrace.Add(title); // Adds the last title var next = redirects[title]; if (redirectTrace.Contains(next)) { throw new InvalidOperationException(string.Format(Prompts.ExceptionWikiPageResolveCircularRedirect1, string.Join("->", redirectTrace))); } title = next; } // Finally, get the page. var pageInfo = pageInfoDict[title]; if (redirectTrace.Count > 0) { page.RedirectPath = redirectTrace; } MediaWikiHelper.PopulatePageFromJson(page, (JObject)pageInfo.Value, options); } } else { foreach (var page in partition) { var jPage = (JObject)jquery["pages"][page.Id.ToString(CultureInfo.InvariantCulture)]; MediaWikiHelper.PopulatePageFromJson(page, jPage, options); } } } } } }
/// <inheritdoc /> public int GetMaxPaginationSize(bool apiHighLimits) { return(underlyingProvider.GetMaxPaginationSize(apiHighLimits)); }