Example #1
0
        /// <summary>
        /// Refresh a sequence of revisions by revid, along with their owner pages.
        /// </summary>
        /// <remarks>
        /// <para>If there's invalid revision id in <paramref name="revIds"/>, an <see cref="ArgumentException"/> will be thrown while enumerating.</para>
        /// </remarks>
        public static IAsyncEnumerable <Revision> FetchRevisionsAsync(WikiSite site, IEnumerable <int> revIds, IWikiPageQueryProvider options, CancellationToken cancellationToken)
        {
            if (revIds == null)
            {
                throw new ArgumentNullException(nameof(revIds));
            }
            var queryParams = options.EnumParameters(site.SiteInfo.Version).ToDictionary();

            // Remove any rvlimit magic word generated by RevisionsPropertyProvider.
            // We are only fetching by revisions.
            queryParams.Remove("rvlimit");
            var titleLimit = options.GetMaxPaginationSize(site.SiteInfo.Version, site.AccountInfo.HasRight(UserRights.ApiHighLimits));

            return(AsyncEnumerableFactory.FromAsyncGenerator <Revision>(async sink =>
            {
                // Page ID --> Page Stub
                var stubDict = new Dictionary <int, WikiPageStub>();
                var revDict = new Dictionary <int, Revision>();
                using (site.BeginActionScope(null, (object)revIds))
                {
                    foreach (var partition in revIds.Partition(titleLimit))
                    {
                        site.Logger.LogDebug("Fetching {Count} revisions from {Site}.", partition.Count, site);
                        queryParams["revids"] = MediaWikiHelper.JoinValues(partition);
                        var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken);
                        var jpages = (JObject)jobj["query"]["pages"];
                        // Generate stubs first
                        foreach (var p in jpages)
                        {
                            var jrevs = p.Value["revisions"];
                            if (jrevs == null || !jrevs.HasValues)
                            {
                                continue;
                            }
                            var id = Convert.ToInt32(p.Key);
                            if (!stubDict.TryGetValue(id, out var stub))
                            {
                                stub = new WikiPageStub(id, (string)p.Value["title"], (int)p.Value["ns"]);
                                stubDict.Add(id, stub);
                            }
                            foreach (var jrev in jrevs)
                            {
                                var rev = jrev.ToObject <Revision>(Utility.WikiJsonSerializer);
                                rev.Page = stub;
                                revDict.Add(rev.Id, rev);
                            }
                        }
                        await sink.YieldAndWait(partition.Select(id => revDict.TryGetValue(id, out var rev) ? rev : null));
                    }
                }
            }));
        }
Example #2
0
 /// <inheritdoc />
 public int GetMaxPaginationSize(MediaWikiVersion version, bool apiHighLimits)
 {
     return(underlyingProvider.GetMaxPaginationSize(version, apiHighLimits));
 }
Example #3
0
        /// <summary>
        /// Refresh a sequence of pages.
        /// </summary>
        public static async Task RefreshPagesAsync(IEnumerable <WikiPage> pages, IWikiPageQueryProvider options, CancellationToken cancellationToken)
        {
            if (pages == null)
            {
                throw new ArgumentNullException(nameof(pages));
            }
            // You can even fetch pages from different sites.
            foreach (var sitePages in pages.GroupBy(p => new WikiPageGroupKey(p)))
            {
                var site        = sitePages.Key.Site;
                var queryParams = options.EnumParameters().ToDictionary();
                var titleLimit  = options.GetMaxPaginationSize(site.AccountInfo.HasRight(UserRights.ApiHighLimits));
                using (site.BeginActionScope(sitePages, options))
                {
                    foreach (var partition in sitePages.Partition(titleLimit))
                    {
                        if (sitePages.Key.HasTitle)
                        {
                            // If a page has both title and ID information,
                            // we will use title anyway.
                            site.Logger.LogDebug("Fetching {Count} pages by title.", partition.Count);
                            queryParams["titles"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Title));
                        }
                        else
                        {
                            site.Logger.LogDebug("Fetching {Count} pages by ID.", partition.Count);
                            Debug.Assert(sitePages.All(p => p.PageStub.HasId));
                            queryParams["pageids"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Id));
                        }
                        // For single-page fetching, force fetching 1 revision only.
                        if (partition.Count == 1)
                        {
                            queryParams["rvlimit"] = 1;
                        }
                        else
                        {
                            queryParams.Remove("rvlimit");
                        }
                        var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken);

                        if (sitePages.Key.HasTitle)
                        {
                            // Process title normalization.
                            var normalized = jobj["query"]["normalized"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]);
                            // Process redirects.
                            var redirects    = jobj["query"]["redirects"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]);
                            var pageInfoDict = ((JObject)jobj["query"]["pages"]).Properties()
                                               .ToDictionary(p => (string)p.Value["title"]);
                            foreach (var page in partition)
                            {
                                var title = page.Title;
                                // Normalize the title first.
                                if (normalized?.ContainsKey(title) ?? false)
                                {
                                    title = normalized[title];
                                }
                                // Then process the redirects.
                                var redirectTrace = new List <string>();
                                while (redirects?.ContainsKey(title) ?? false)
                                {
                                    redirectTrace.Add(title); // Adds the last title
                                    var next = redirects[title];
                                    if (redirectTrace.Contains(next))
                                    {
                                        throw new InvalidOperationException($"Cannot resolve circular redirect: {string.Join("->", redirectTrace)}.");
                                    }
                                    title = next;
                                }
                                // Finally, get the page.
                                var pageInfo = pageInfoDict[title];
                                if (redirectTrace.Count > 0)
                                {
                                    page.RedirectPath = redirectTrace;
                                }
                                MediaWikiHelper.PopulatePageFromJson(page, (JObject)pageInfo.Value, options);
                            }
                        }
                        else
                        {
                            foreach (var page in partition)
                            {
                                var jPage = (JObject)jobj["query"]["pages"][page.Id.ToString()];
                                MediaWikiHelper.PopulatePageFromJson(page, jPage, options);
                            }
                        }
                    }
                }
            }
        }
Example #4
0
        /// <summary>
        /// Refresh a sequence of pages.
        /// </summary>
        public static async Task RefreshPagesAsync(IEnumerable <WikiPage> pages, IWikiPageQueryProvider options, CancellationToken cancellationToken)
        {
            if (pages == null)
            {
                throw new ArgumentNullException(nameof(pages));
            }
            // You can even fetch pages from different sites.
            foreach (var sitePages in pages.GroupBy(p => new WikiPageGroupKey(p)))
            {
                var site        = sitePages.Key.Site;
                var queryParams = options.EnumParameters(site.SiteInfo.Version).ToDictionary();
                var titleLimit  = options.GetMaxPaginationSize(site.SiteInfo.Version, site.AccountInfo.HasRight(UserRights.ApiHighLimits));
                using (site.BeginActionScope(sitePages, options))
                {
                    foreach (var partition in sitePages.Partition(titleLimit))
                    {
                        if (sitePages.Key.HasTitle)
                        {
                            // If a page has both title and ID information,
                            // we will use title anyway.
                            site.Logger.LogDebug("Fetching {Count} pages by title.", partition.Count);
                            queryParams["titles"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Title));
                        }
                        else
                        {
                            site.Logger.LogDebug("Fetching {Count} pages by ID.", partition.Count);
                            Debug.Assert(sitePages.All(p => p.PageStub.HasId));
                            queryParams["pageids"] = MediaWikiHelper.JoinValues(partition.Select(p => p.Id));
                        }
                        var jobj = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams), cancellationToken);

                        var jquery             = (JObject)jobj["query"];
                        var continuationStatus = ParseContinuationParameters(jobj, queryParams, null);
                        // Process continuation caused by props (e.g. langlinks) that contain a list that is too long.
                        if (continuationStatus != CONTINUATION_DONE)
                        {
                            var queryParams1       = new Dictionary <string, object>();
                            var continuationParams = new Dictionary <string, object>();
                            var jobj1 = jobj;
                            ParseContinuationParameters(jobj1, queryParams1, continuationParams);
                            while (continuationStatus != CONTINUATION_DONE)
                            {
                                if (continuationStatus == CONTINUATION_LOOP)
                                {
                                    throw new UnexpectedDataException(Prompts.ExceptionUnexpectedContinuationLoop);
                                }
                                Debug.Assert(continuationStatus == CONTINUATION_AVAILABLE);
                                site.Logger.LogDebug("Detected query continuation. PartitionCount={PartitionCount}.", partition.Count);
                                queryParams1.Clear();
                                queryParams1.MergeFrom(queryParams);
                                queryParams1.MergeFrom(continuationParams);
                                jobj1 = await site.InvokeMediaWikiApiAsync(new MediaWikiFormRequestMessage(queryParams1), cancellationToken);

                                var jquery1 = jobj1["query"];
                                if (jquery1.HasValues)
                                {
                                    // Merge JSON response
                                    jquery.Merge(jquery1);
                                }
                                continuationStatus = ParseContinuationParameters(jobj1, queryParams1, continuationParams);
                            }
                        }
                        if (sitePages.Key.HasTitle)
                        {
                            // Process title normalization.
                            var normalized = jquery["normalized"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]);
                            // Process redirects.
                            var redirects    = jquery["redirects"]?.ToDictionary(n => (string)n["from"], n => (string)n["to"]);
                            var pageInfoDict = ((JObject)jquery["pages"]).Properties()
                                               .ToDictionary(p => (string)p.Value["title"]);
                            foreach (var page in partition)
                            {
                                var title = page.Title;
                                // Normalize the title first.
                                if (normalized?.ContainsKey(title) ?? false)
                                {
                                    title = normalized[title];
                                }
                                // Then process the redirects.
                                var redirectTrace = new List <string>();
                                while (redirects?.ContainsKey(title) ?? false)
                                {
                                    redirectTrace.Add(title); // Adds the last title
                                    var next = redirects[title];
                                    if (redirectTrace.Contains(next))
                                    {
                                        throw new InvalidOperationException(string.Format(Prompts.ExceptionWikiPageResolveCircularRedirect1, string.Join("->", redirectTrace)));
                                    }
                                    title = next;
                                }
                                // Finally, get the page.
                                var pageInfo = pageInfoDict[title];
                                if (redirectTrace.Count > 0)
                                {
                                    page.RedirectPath = redirectTrace;
                                }
                                MediaWikiHelper.PopulatePageFromJson(page, (JObject)pageInfo.Value, options);
                            }
                        }
                        else
                        {
                            foreach (var page in partition)
                            {
                                var jPage = (JObject)jquery["pages"][page.Id.ToString(CultureInfo.InvariantCulture)];
                                MediaWikiHelper.PopulatePageFromJson(page, jPage, options);
                            }
                        }
                    }
                }
            }
        }
Example #5
0
 /// <inheritdoc />
 public int GetMaxPaginationSize(bool apiHighLimits)
 {
     return(underlyingProvider.GetMaxPaginationSize(apiHighLimits));
 }