protected internal virtual void OnLoadPageInfo(JObject jpage, IWikiPageQueryProvider options) { // Initialize propertyGroups?.Clear(); // Update page stub PageStub = MediaWikiHelper.PageStubFromJson(jpage); // Load page info // Invalid page title (like File:) if (PageStub.IsInvalid) { return; } // Load property groups foreach (var group in options.ParsePropertyGroups(jpage)) { Debug.Assert(group != null, "The returned sequence from IWikiPageQueryParameters.ParsePropertyGroups contains null item."); if (propertyGroups == null) { propertyGroups = new List <IWikiPagePropertyGroup>(); } propertyGroups.Add(group); } // Check if the client has requested for revision content… LastRevision = GetPropertyGroup <RevisionsPropertyGroup>()?.LatestRevision; if (LastRevision?.Content != null) { Content = LastRevision.Content; } LastFileRevision = GetPropertyGroup <FileInfoPropertyGroup>()?.LatestRevision; pageInfo = GetPropertyGroup <PageInfoPropertyGroup>(); LastRevisionId = pageInfo?.LastRevisionId ?? 0; ContentModel = pageInfo?.ContentModel; }
/// <summary> /// Asynchronously generates the sequence of pages. /// </summary> /// <param name="options">Options when querying for the pages.</param> public virtual IAsyncEnumerable <WikiPage> EnumPagesAsync(IWikiPageQueryProvider options) { var queryParams = options.EnumParameters(Site.SiteInfo.Version).ToDictionary(); queryParams.Add("generator", GeneratorName); foreach (var v in EnumGeneratorParameters()) { queryParams[v.Key] = v.Value; } return(RequestHelper.QueryWithContinuation(Site, queryParams, () => Site.BeginActionScope(this, options), DistinctGeneratedPages) .SelectMany(jquery => WikiPage.FromJsonQueryResult(Site, jquery, options).ToAsyncEnumerable())); }
/// <summary> /// Loads page information from JSON. /// </summary> /// <param name="page"></param> /// <param name="json">query.pages.xxx property value.</param> /// <param name="options"></param> public static void PopulatePageFromJson(WikiPage page, JObject json, IWikiPageQueryProvider options) { if (page == null) { throw new ArgumentNullException(nameof(page)); } if (json == null) { throw new ArgumentNullException(nameof(json)); } if (options == null) { throw new ArgumentNullException(nameof(options)); } page.OnLoadPageInfo(json, options); }
/// <summary> /// Fetch revisions by revid sequence. /// </summary> /// <param name="site">The site to fetch revisions from.</param> /// <param name="revisionIds">The desired revision Ids.</param> /// <param name="options">The options for fetching the revisions.</param> /// <param name="cancellationToken">A token used to cancel the operation.</param> /// <exception cref="ArgumentNullException">Either <paramref name="site"/> or <paramref name="revisionIds"/> is <c>null</c>.</exception> /// <exception cref="ArgumentException"><paramref name="revisionIds"/> contains an existing revision id.</exception> /// <remarks> /// <para>The returned sequence will have the SAME order as specified in <paramref name="revisionIds"/>.</para> /// <para>The <see cref="WikiPage"/> of returned <see cref="Revision"/> will be a valid object. /// However, its <see cref="WikiPage.LastRevision"/> and <see cref="WikiPage.Content"/> will corresponds /// to the lastest revision fetched in this invocation, and pages with the same title /// share the same reference.</para> /// <para>If there's invalid revision id in <paramref name="revisionIds"/>, an <see cref="ArgumentException"/> /// will be thrown while enumerating.</para> /// </remarks> public static IAsyncEnumerable <Revision> FetchRevisionsAsync(WikiSite site, IEnumerable <int> revisionIds, IWikiPageQueryProvider options, CancellationToken cancellationToken) { if (site == null) { throw new ArgumentNullException(nameof(site)); } if (revisionIds == null) { throw new ArgumentNullException(nameof(revisionIds)); } return(RequestHelper.FetchRevisionsAsync(site, revisionIds, options, cancellationToken)); }
public SealedWikiPageQueryProvider(IWikiPageQueryProvider underlyingProvider) { this.underlyingProvider = underlyingProvider ?? throw new ArgumentNullException(nameof(underlyingProvider)); }
/// <summary> /// Creates a list of <see cref="WikiPage"/> based on JSON query result. /// </summary> /// <param name="site">A <see cref="Site"/> object.</param> /// <param name="queryNode">The <c>qurey</c> node value object of JSON result.</param> /// <param name="options"></param> /// <returns>Retrieved pages.</returns> internal static IList <WikiPage> FromJsonQueryResult(WikiSite site, JObject queryNode, IWikiPageQueryProvider options) { if (site == null) { throw new ArgumentNullException(nameof(site)); } if (queryNode == null) { throw new ArgumentNullException(nameof(queryNode)); } var pages = (JObject)queryNode["pages"]; if (pages == null) { return(EmptyPages); } // If query.xxx.index exists, sort the pages by the given index. // This is specifically used with SearchGenerator, to keep the search result in order. // For other generators, this property simply does not exist. // See https://www.mediawiki.org/wiki/API_talk:Query#On_the_order_of_titles_taken_out_of_generator . return(pages.Properties().OrderBy(page => (int?)page.Value["index"]) .Select(page => { var newInst = new WikiPage(site, 0); MediaWikiHelper.PopulatePageFromJson(newInst, (JObject)page.Value, options); return newInst; }).ToList()); }
/// <summary> /// Refresh a sequence of revisions by revid, along with their owner pages. /// </summary> /// <remarks> /// <para>If there's invalid revision id in <paramref name="revIds"/>, an <see cref="ArgumentException"/> will be thrown while enumerating.</para> /// </remarks> public static IAsyncEnumerable <Revision> FetchRevisionsAsync(WikiSite site, IEnumerable <int> revIds, IWikiPageQueryProvider options, CancellationToken cancellationToken) { if (revIds == null) { throw new ArgumentNullException(nameof(revIds)); } var queryParams = options.EnumParameters(site.SiteInfo.Version).ToDictionary(); // Remove any rvlimit magic word generated by RevisionsPropertyProvider. // We are only fetching by revisions. queryParams.Remove("rvlimit"); var titleLimit = options.GetMaxPaginationSize(site.SiteInfo.Version, site.AccountInfo.HasRight(UserRights.ApiHighLimits)); return(AsyncEnumerableFactory.FromAsyncGenerator <Revision>(async sink => { // Page ID --> Page Stub var stubDict = new Dictionary <int, WikiPageStub>(); var revDict = new Dictionary <int, Revision>(); using (site.BeginActionScope(null, (object)revIds)) { foreach (var partition in revIds.Partition(titleLimit)) { site.Logger.LogDebug("Fetching {Count} revisions from {Site}.", partition.Count, site); queryParams["revids"] = MediaWikiHelper.JoinValues(partition); var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken); var jpages = (JObject)jobj["query"]["pages"]; // Generate stubs first foreach (var p in jpages) { var jrevs = p.Value["revisions"]; if (jrevs == null || !jrevs.HasValues) { continue; } var id = Convert.ToInt32(p.Key); if (!stubDict.TryGetValue(id, out var stub)) { stub = new WikiPageStub(id, (string)p.Value["title"], (int)p.Value["ns"]); stubDict.Add(id, stub); } foreach (var jrev in jrevs) { var rev = jrev.ToObject <Revision>(Utility.WikiJsonSerializer); rev.Page = stub; revDict.Add(rev.Id, rev); } } await sink.YieldAndWait(partition.Select(id => revDict.TryGetValue(id, out var rev) ? rev : null)); } } })); }
/// <summary> /// Refresh a sequence of pages. /// </summary> public static async Task RefreshPagesAsync(IEnumerable <WikiPage> pages, IWikiPageQueryProvider options, CancellationToken cancellationToken) { if (pages == null) { throw new ArgumentNullException(nameof(pages)); } // You can even fetch pages from different sites. foreach (var sitePages in pages.GroupBy(p => new WikiPageGroupKey(p))) { var site = sitePages.Key.Site; var queryParams = options.EnumParameters(site.SiteInfo.Version).ToDictionary(); var titleLimit = options.GetMaxPaginationSize(site.SiteInfo.Version, site.AccountInfo.HasRight(UserRights.ApiHighLimits)); using (site.BeginActionScope(sitePages, options)) { foreach (var partition in sitePages.Partition(titleLimit)) { if (sitePages.Key.HasTitle) { // If a page has both title and ID information, // we will use title anyway. site.Logger.LogDebug("Fetching {Count} pages by title.", partition.Count); queryParams["titles"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Title)); } else { site.Logger.LogDebug("Fetching {Count} pages by ID.", partition.Count); Debug.Assert(sitePages.All(p => p.PageStub.HasId)); queryParams["pageids"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Id)); } var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken); var jquery = (JObject)jobj["query"]; var continuationStatus = ParseContinuationParameters(jobj, queryParams, null); // Process continuation caused by props (e.g. langlinks) that contain a list that is too long. if (continuationStatus != CONTINUATION_DONE) { var queryParams1 = new Dictionary <string, object>(); var continuationParams = new Dictionary <string, object>(); var jobj1 = jobj; ParseContinuationParameters(jobj1, queryParams1, continuationParams); while (continuationStatus != CONTINUATION_DONE) { if (continuationStatus == CONTINUATION_LOOP) { throw new UnexpectedDataException(Prompts.ExceptionUnexpectedContinuationLoop); } Debug.Assert(continuationStatus == CONTINUATION_AVAILABLE); site.Logger.LogDebug("Detected query continuation. PartitionCount={PartitionCount}.", partition.Count); queryParams1.Clear(); queryParams1.MergeFrom(queryParams); queryParams1.MergeFrom(continuationParams); jobj1 = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams1), cancellationToken); var jquery1 = jobj1["query"]; if (jquery1.HasValues) { // Merge JSON response jquery.Merge(jquery1); } continuationStatus = ParseContinuationParameters(jobj1, queryParams1, continuationParams); } } if (sitePages.Key.HasTitle) { // Process title normalization. var normalized = jquery["normalized"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); // Process redirects. var redirects = jquery["redirects"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); var pageInfoDict = ((JObject)jquery["pages"]).Properties() .ToDictionary(p => (string)p.Value["title"]); foreach (var page in partition) { var title = page.Title; // Normalize the title first. if (normalized?.ContainsKey(title) ?? false) { title = normalized[title]; } // Then process the redirects. var redirectTrace = new List <string>(); while (redirects?.ContainsKey(title) ?? false) { redirectTrace.Add(title); // Adds the last title var next = redirects[title]; if (redirectTrace.Contains(next)) { throw new InvalidOperationException(string.Format(Prompts.ExceptionWikiPageResolveCircularRedirect1, string.Join("->", redirectTrace))); } title = next; } // Finally, get the page. var pageInfo = pageInfoDict[title]; if (redirectTrace.Count > 0) { page.RedirectPath = redirectTrace; } MediaWikiHelper.PopulatePageFromJson(page, (JObject)pageInfo.Value, options); } } else { foreach (var page in partition) { var jPage = (JObject)jquery["pages"][page.Id.ToString(CultureInfo.InvariantCulture)]; MediaWikiHelper.PopulatePageFromJson(page, jPage, options); } } } } } }
public override IAsyncEnumerable <WikiPage> EnumPagesAsync(IWikiPageQueryProvider options) { return(base.EnumPagesAsync(options)); }
/// <summary> /// Asynchronously fetch information for a sequence of pages. /// </summary> /// <param name="pages">A sequence of pages to be refreshed.</param> /// <param name="options">Provides options when performing the query.</param> /// <param name="cancellationToken">The cancellation token that will be checked prior to completing the returned task.</param> /// <remarks> /// It's recommended that <paramref name="pages"/> is a list or a subset of a list /// that is hold by caller, because this method will not return the refreshed pages. /// </remarks> /// <exception cref="InvalidOperationException">Circular redirect detected when resolving redirects.</exception> public static Task RefreshAsync(this IEnumerable <WikiPage> pages, IWikiPageQueryProvider options, CancellationToken cancellationToken) { return(RequestHelper.RefreshPagesAsync(pages, options, cancellationToken)); }
/// <inheritdoc cref="RefreshAsync(IEnumerable{WikiPage},IWikiPageQueryProvider,CancellationToken)"/> public static Task RefreshAsync(this IEnumerable <WikiPage> pages, IWikiPageQueryProvider options) { return(RefreshAsync(pages, options, CancellationToken.None)); }
/// <summary> /// Fetch information for the page. /// </summary> /// <param name="options">Options when querying for the pages.</param> /// <param name="cancellationToken">The cancellation token that will be checked prior to completing the returned task.</param> /// <remarks> /// For fetching multiple pages at one time, see <see cref="WikiPageExtensions.RefreshAsync(IEnumerable{WikiPage}, PageQueryOptions)"/>. /// </remarks> /// <exception cref="InvalidOperationException">Circular redirect detected when resolving redirects.</exception> public Task RefreshAsync(IWikiPageQueryProvider options, CancellationToken cancellationToken) { return(RequestHelper.RefreshPagesAsync(new[] { this }, options, cancellationToken)); }
/// <inheritdoc cref="RefreshAsync(IWikiPageQueryProvider, CancellationToken)"/> public Task RefreshAsync(IWikiPageQueryProvider options) { return(RefreshAsync(options, CancellationToken.None)); }
/// <summary> /// Refresh a sequence of pages. /// </summary> public static async Task RefreshPagesAsync(IEnumerable <WikiPage> pages, IWikiPageQueryProvider options, CancellationToken cancellationToken) { if (pages == null) { throw new ArgumentNullException(nameof(pages)); } // You can even fetch pages from different sites. foreach (var sitePages in pages.GroupBy(p => new WikiPageGroupKey(p))) { var site = sitePages.Key.Site; var queryParams = options.EnumParameters().ToDictionary(); var titleLimit = options.GetMaxPaginationSize(site.AccountInfo.HasRight(UserRights.ApiHighLimits)); using (site.BeginActionScope(sitePages, options)) { foreach (var partition in sitePages.Partition(titleLimit)) { if (sitePages.Key.HasTitle) { // If a page has both title and ID information, // we will use title anyway. site.Logger.LogDebug("Fetching {Count} pages by title.", partition.Count); queryParams["titles"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Title)); } else { site.Logger.LogDebug("Fetching {Count} pages by ID.", partition.Count); Debug.Assert(sitePages.All(p => p.PageStub.HasId)); queryParams["pageids"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Id)); } // For single-page fetching, force fetching 1 revision only. if (partition.Count == 1) { queryParams["rvlimit"] = 1; } else { queryParams.Remove("rvlimit"); } var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken); if (sitePages.Key.HasTitle) { // Process title normalization. var normalized = jobj["query"]["normalized"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); // Process redirects. var redirects = jobj["query"]["redirects"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]); var pageInfoDict = ((JObject)jobj["query"]["pages"]).Properties() .ToDictionary(p => (string)p.Value["title"]); foreach (var page in partition) { var title = page.Title; // Normalize the title first. if (normalized?.ContainsKey(title) ?? false) { title = normalized[title]; } // Then process the redirects. var redirectTrace = new List <string>(); while (redirects?.ContainsKey(title) ?? false) { redirectTrace.Add(title); // Adds the last title var next = redirects[title]; if (redirectTrace.Contains(next)) { throw new InvalidOperationException($"Cannot resolve circular redirect: {string.Join("->", redirectTrace)}."); } title = next; } // Finally, get the page. var pageInfo = pageInfoDict[title]; if (redirectTrace.Count > 0) { page.RedirectPath = redirectTrace; } MediaWikiHelper.PopulatePageFromJson(page, (JObject)pageInfo.Value, options); } } else { foreach (var page in partition) { var jPage = (JObject)jobj["query"]["pages"][page.Id.ToString()]; MediaWikiHelper.PopulatePageFromJson(page, jPage, options); } } } } } }