Пример #1
0
        public async Task WpLzhPageLanguageLinksTest()
        {
            var site = await WpLzhSiteAsync;
            var page = new WikiPage(site, "莎拉伯恩哈特");
            await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new LanguageLinksPropertyProvider(LanguageLinkProperties.Autonym) } });

            var langLinks = page.GetPropertyGroup <LanguageLinksPropertyGroup>()?.LanguageLinks;

            ShallowTrace(langLinks);
            Assert.NotNull(langLinks);
            Assert.True(langLinks.Count > 120);
            var langLink = langLinks.FirstOrDefault(l => l.Language == "en");

            Assert.NotNull(langLink);
            Assert.Equal("Sarah Bernhardt", langLink.Title);
            Assert.Equal("English", langLink.Autonym);
            // We didn't ask for URL so this should be null.
            Assert.All(langLinks, l => Assert.Null(l.Url));
            // Try out whether we still can fetch complete prop values even in the case of prop pagination.
            var pages = new[] { "挪威", "坤輿", "維基共享" }.Select(t => new WikiPage(site, t)).Append(page).ToList();
            await pages.RefreshAsync(new WikiPageQueryProvider { Properties = { new LanguageLinksPropertyProvider() } });

            Output.WriteLine("Language links ----");
            foreach (var p in pages)
            {
                Output.WriteLine("{0}: {1}", p, p.GetPropertyGroup <LanguageLinksPropertyGroup>()?.LanguageLinks.Count);
            }
            Assert.All(pages, p => Assert.True(p.GetPropertyGroup <LanguageLinksPropertyGroup>().LanguageLinks.Count > 50));
            Assert.Equal(langLinks.ToDictionary(l => l.Language, l => l.Title),
                         page.GetPropertyGroup <LanguageLinksPropertyGroup>().LanguageLinks.ToDictionary(l => l.Language, l => l.Title));
        }
Пример #2
0
        private async Task <Feature> ConvertPageToFeature(WikiPage page, string language)
        {
            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new ExtractsPropertyProvider       {
                        AsPlainText = true, IntroductionOnly = true, MaxSentences = 1
                    },
                    new PageImagesPropertyProvider     {
                        QueryOriginalImage = true
                    },
                    new GeoCoordinatesPropertyProvider {
                        QueryPrimaryCoordinate = true
                    },
                    new RevisionsPropertyProvider      {
                        FetchContent = false
                    }
                }
            });

            if (page.Exists == false)
            {
                return(null);
            }
            var geoCoordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>().PrimaryCoordinate;

            if (geoCoordinate.IsEmpty)
            {
                return(null);
            }
            var coordinate = new CoordinateZ(geoCoordinate.Longitude, geoCoordinate.Latitude, double.NaN);
            var attributes = GetAttributes(coordinate, page.Title, page.Id.ToString(), language);

            attributes.Add(FeatureAttributes.DESCRIPTION + ":" + language, page.GetPropertyGroup <ExtractsPropertyGroup>().Extract ?? string.Empty);
            var imageUrl = page.GetPropertyGroup <PageImagesPropertyGroup>().OriginalImage.Url ?? string.Empty;

            if (!string.IsNullOrWhiteSpace(imageUrl) &&
                (imageUrl.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) ||
                 imageUrl.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase) ||
                 imageUrl.EndsWith(".png", StringComparison.OrdinalIgnoreCase) ||
                 imageUrl.EndsWith(".bmp", StringComparison.OrdinalIgnoreCase)))
            {
                attributes.Add(FeatureAttributes.IMAGE_URL, imageUrl);
            }
            attributes.Add(FeatureAttributes.POI_USER_NAME + ":" + language, page.LastRevision.UserName);
            attributes.Add(FeatureAttributes.POI_USER_ADDRESS + ":" + language, _wikiSites[language].SiteInfo.MakeArticleUrl($"User:{Uri.EscapeUriString(page.LastRevision.UserName)}"));
            attributes.Add(FeatureAttributes.POI_LAST_MODIFIED + ":" + language, page.LastRevision.TimeStamp.ToString("o"));
            var feature = new Feature(new Point(coordinate), attributes);

            feature.SetTitles();
            feature.SetId();
            return(feature);
        }
Пример #3
0
        private async Task <FeatureCollection> ConvertPageToFeatureCollection(WikiPage page, string language)
        {
            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new ExtractsPropertyProvider       {
                        AsPlainText = true, IntroductionOnly = true, MaxSentences = 1
                    },
                    new PageImagesPropertyProvider     {
                        QueryOriginalImage = true
                    },
                    new GeoCoordinatesPropertyProvider {
                        QueryPrimaryCoordinate = true
                    },
                    new RevisionsPropertyProvider      {
                        FetchContent = false
                    }
                }
            });

            if (page.Exists == false)
            {
                return(null);
            }
            var geoCoordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>().PrimaryCoordinate;

            if (geoCoordinate.IsEmpty)
            {
                return(null);
            }
            var coordinate = new Coordinate(geoCoordinate.Longitude, geoCoordinate.Latitude);
            var attributes = GetAttributes(coordinate, page.Title, page.Id.ToString(), language);

            attributes.Add(FeatureAttributes.DESCRIPTION + ":" + language, page.GetPropertyGroup <ExtractsPropertyGroup>().Extract ?? string.Empty);
            var imageUrl = page.GetPropertyGroup <PageImagesPropertyGroup>().OriginalImage.Url ?? string.Empty;

            attributes.Add(FeatureAttributes.IMAGE_URL, imageUrl.EndsWith(".svg") ? string.Empty : imageUrl);
            attributes.Add(FeatureAttributes.POI_USER_NAME, page.LastRevision.UserName);
            attributes.Add(FeatureAttributes.POI_USER_ADDRESS, _wikiSites[language].SiteInfo.MakeArticleUrl($"User:{Uri.EscapeUriString(page.LastRevision.UserName)}"));
            attributes.Add(FeatureAttributes.POI_LAST_MODIFIED, page.LastRevision.TimeStamp.ToString("o"));
            return(new FeatureCollection(new Collection <IFeature> {
                new Feature(new Point(coordinate), attributes)
            }));
        }
Пример #4
0
        public async Task <FeatureCollection> GetById(string id)
        {
            var language = id.Split('_').First();
            var pageId   = id.Split('_').Last();
            var site     = _wikiSites[language];
            var stub     = await WikiPageStub.FromPageIds(site, new[] { int.Parse(pageId) }).First();

            var page = new WikiPage(site, stub.Title);
            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new ExtractsPropertyProvider       {
                        AsPlainText = true, IntroductionOnly = true, MaxSentences = 1
                    },
                    new PageImagesPropertyProvider     {
                        QueryOriginalImage = true
                    },
                    new GeoCoordinatesPropertyProvider {
                        QueryPrimaryCoordinate = true
                    }
                }
            });

            var geoCoordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>().PrimaryCoordinate;
            var coordinate    = new Coordinate(geoCoordinate.Longitude, geoCoordinate.Latitude);
            var attributes    = GetAttributes(coordinate, page.Title, id, language);

            attributes.Add(FeatureAttributes.DESCRIPTION, page.GetPropertyGroup <ExtractsPropertyGroup>().Extract ?? string.Empty);
            attributes.Add(FeatureAttributes.IMAGE_URL, page.GetPropertyGroup <PageImagesPropertyGroup>().OriginalImage.Url);
            attributes.Add(FeatureAttributes.WEBSITE, $"https://{language}.wikipedia.org/?curid={page.Id}");

            return(new FeatureCollection(new Collection <IFeature> {
                new Feature(new Point(coordinate), attributes)
            }));
        }
Пример #5
0
        public async Task WikiaCategoryMembersGeneratorTest()
        {
            var site = await WikiaTestSiteAsync;
            var cat  = new WikiPage(site, "Category:BlogListingPage‏‎‏‎");
            await cat.RefreshAsync();

            Output.WriteLine(cat.ToString());
            var generator = new CategoryMembersGenerator(cat)
            {
                PaginationSize = 50
            };
            var pages = await generator.EnumPagesAsync().ToList();

            TracePages(pages);
            AssertTitlesDistinct(pages);
            var catInfo = cat.GetPropertyGroup <CategoryInfoPropertyGroup>();

            Assert.Equal(catInfo.MembersCount, pages.Count);
        }
Пример #6
0
        public async Task WpCategoryMembersGeneratorTest()
        {
            var site = await WpTest2SiteAsync;
            var cat  = new WikiPage(site, "Category:Template documentation pages‏‎");
            await cat.RefreshAsync();

            WriteOutput(cat);
            var generator = new CategoryMembersGenerator(cat)
            {
                PaginationSize = 50
            };
            var pages = await generator.EnumPagesAsync().ToListAsync();

            TracePages(pages);
            AssertTitlesDistinct(pages);
            var catInfo = cat.GetPropertyGroup <CategoryInfoPropertyGroup>();

            Assert.Equal(catInfo.MembersCount, pages.Count);
        }
Пример #7
0
        public async Task WpLzhPageExtractTest()
        {
            var site = await WpLzhSiteAsync;
            var page = new WikiPage(site, "莎拉伯恩哈特");
            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new ExtractsPropertyProvider
                    {
                        AsPlainText      = true,
                        IntroductionOnly = true,
                        MaxSentences     = 1
                    }
                }
            });

            ShallowTrace(page);
            Assert.Equal("莎拉·伯恩哈特,一八四四年生,法國巴黎人也。", page.GetPropertyGroup <ExtractsPropertyGroup>().Extract);
        }
Пример #8
0
        public async Task WpTest2PageGeoCoordinateTest()
        {
            var site = await WpTest2SiteAsync;
            var page = new WikiPage(site, "France");
            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new GeoCoordinatesPropertyProvider()
                }
            });

            ShallowTrace(page);
            var coordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>();

            Assert.False(coordinate.PrimaryCoordinate.IsEmpty);
            Assert.Equal(47, coordinate.PrimaryCoordinate.Latitude, 12);
            Assert.Equal(2, coordinate.PrimaryCoordinate.Longitude, 12);
            Assert.Equal(GeoCoordinate.Earth, coordinate.PrimaryCoordinate.Globe);
        }
Пример #9
0
        public async Task WpEnPageGeoCoordinateTest()
        {
            var site = await WpEnSiteAsync;
            var page = new WikiPage(site, "Paris");
            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new GeoCoordinatesPropertyProvider()
                }
            });

            ShallowTrace(page);
            var coordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>();

            Assert.NotNull(coordinate);
            ShallowTrace(coordinate);
            Assert.False(coordinate.PrimaryCoordinate.IsEmpty);
            Assert.Equal(48.856613, coordinate.PrimaryCoordinate.Latitude, 5);
            Assert.Equal(2.352222, coordinate.PrimaryCoordinate.Longitude, 5);
            Assert.Equal(GeoCoordinate.Earth, coordinate.PrimaryCoordinate.Globe);
        }
Пример #10
0
        public async Task WpLzhPageImagesTest()
        {
            var site = await WpLzhSiteAsync;
            var page = new WikiPage(site, "挪威");
            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new PageImagesPropertyProvider
                    {
                        QueryOriginalImage = true,
                        ThumbnailSize      = 100
                    }
                }
            });

            var group = page.GetPropertyGroup <PageImagesPropertyGroup>();

            ShallowTrace(group);
            Assert.Equal("Flag_of_Norway.svg", group.ImageTitle);
            Assert.Equal("https://upload.wikimedia.org/wikipedia/commons/d/d9/Flag_of_Norway.svg", group.OriginalImage.Url);
            Assert.Equal("https://upload.wikimedia.org/wikipedia/commons/thumb/d/d9/Flag_of_Norway.svg/100px-Flag_of_Norway.svg.png", group.ThumbnailImage.Url);
            Assert.Equal(100, Math.Max(group.ThumbnailImage.Width, group.ThumbnailImage.Height));
        }
Пример #11
0
        public async Task <WikipediaArticle> GetArticleAsync(string language, string name)
        {
            if (rateLimiter.IsRatelimited())
            {
                return(null);
            }

            var article = new WikipediaArticle
            {
                Url = $"https://{language}.wikipedia.org/wiki/{name}"
            };

            var page = new WikiPage(wikipediaSite, name);

            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new ExtractsPropertyProvider
                    {
                        MaxCharacters    = 1024,
                        AsPlainText      = true,
                        IntroductionOnly = true
                    }
                }
            });

            var extractGroup = page.GetPropertyGroup <ExtractsPropertyGroup>();

            article.Name = page.Title;
            article.Url  = WikiLink.Parse(wikipediaSite, name).TargetUrl;

            article.Description = extractGroup.Extract;

            if (article.Description.Length >= 1024)
            {
                var split = article.Description.Split(". ").ToList();
                article.Description = string.Join(". ", split.Take(4)) + ".";
            }

            var response = await HttpWebClient.ReturnStringAsync(new System.Uri($"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&sites={language}wiki&props=claims&titles={name}"));

            var jsonresp  = JObject.Parse(response);
            var container = (JObject)jsonresp["entities"].First.Value <JProperty>().Value;
            var claims    = container["claims"];

            //P18/P154/P242/P109/P1621
            JToken snak = null;

            if (claims["P18"] is not null)
            {
                snak = claims["P18"];
            }
            else if (claims["P154"] is not null)
            {
                snak = claims["P154"];
            }
            else if (claims["P242"] is not null)
            {
                snak = claims["P242"];
            }
            else if (claims["P109"] is not null)
            {
                snak = claims["P109"];
            }
            else if (claims["P1621"] is not null)
            {
                snak = claims["P1621"];
            }

            if (snak is not null)
            {
                var val = snak.First["mainsnak"]["datavalue"]["value"].ToObject <string>();

                val = val.Replace(" ", "_");

                var md5 = val.CreateMD5(true);;

                article.ImageUrl = $"https://upload.wikimedia.org/wikipedia/commons/{md5[0]}/{md5[0]}{md5[1]}/{val}";
            }

            return(article);
        }