public async Task WpLzhPageLanguageLinksTest() { var site = await WpLzhSiteAsync; var page = new WikiPage(site, "莎拉伯恩哈特"); await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new LanguageLinksPropertyProvider(LanguageLinkProperties.Autonym) } }); var langLinks = page.GetPropertyGroup <LanguageLinksPropertyGroup>()?.LanguageLinks; ShallowTrace(langLinks); Assert.NotNull(langLinks); Assert.True(langLinks.Count > 120); var langLink = langLinks.FirstOrDefault(l => l.Language == "en"); Assert.NotNull(langLink); Assert.Equal("Sarah Bernhardt", langLink.Title); Assert.Equal("English", langLink.Autonym); // We didn't ask for URL so this should be null. Assert.All(langLinks, l => Assert.Null(l.Url)); // Try out whether we still can fetch complete prop values even in the case of prop pagination. var pages = new[] { "挪威", "坤輿", "維基共享" }.Select(t => new WikiPage(site, t)).Append(page).ToList(); await pages.RefreshAsync(new WikiPageQueryProvider { Properties = { new LanguageLinksPropertyProvider() } }); Output.WriteLine("Language links ----"); foreach (var p in pages) { Output.WriteLine("{0}: {1}", p, p.GetPropertyGroup <LanguageLinksPropertyGroup>()?.LanguageLinks.Count); } Assert.All(pages, p => Assert.True(p.GetPropertyGroup <LanguageLinksPropertyGroup>().LanguageLinks.Count > 50)); Assert.Equal(langLinks.ToDictionary(l => l.Language, l => l.Title), page.GetPropertyGroup <LanguageLinksPropertyGroup>().LanguageLinks.ToDictionary(l => l.Language, l => l.Title)); }
private async Task <Feature> ConvertPageToFeature(WikiPage page, string language) { await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new ExtractsPropertyProvider { AsPlainText = true, IntroductionOnly = true, MaxSentences = 1 }, new PageImagesPropertyProvider { QueryOriginalImage = true }, new GeoCoordinatesPropertyProvider { QueryPrimaryCoordinate = true }, new RevisionsPropertyProvider { FetchContent = false } } }); if (page.Exists == false) { return(null); } var geoCoordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>().PrimaryCoordinate; if (geoCoordinate.IsEmpty) { return(null); } var coordinate = new CoordinateZ(geoCoordinate.Longitude, geoCoordinate.Latitude, double.NaN); var attributes = GetAttributes(coordinate, page.Title, page.Id.ToString(), language); attributes.Add(FeatureAttributes.DESCRIPTION + ":" + language, page.GetPropertyGroup <ExtractsPropertyGroup>().Extract ?? string.Empty); var imageUrl = page.GetPropertyGroup <PageImagesPropertyGroup>().OriginalImage.Url ?? string.Empty; if (!string.IsNullOrWhiteSpace(imageUrl) && (imageUrl.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) || imageUrl.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase) || imageUrl.EndsWith(".png", StringComparison.OrdinalIgnoreCase) || imageUrl.EndsWith(".bmp", StringComparison.OrdinalIgnoreCase))) { attributes.Add(FeatureAttributes.IMAGE_URL, imageUrl); } attributes.Add(FeatureAttributes.POI_USER_NAME + ":" + language, page.LastRevision.UserName); attributes.Add(FeatureAttributes.POI_USER_ADDRESS + ":" + language, _wikiSites[language].SiteInfo.MakeArticleUrl($"User:{Uri.EscapeUriString(page.LastRevision.UserName)}")); attributes.Add(FeatureAttributes.POI_LAST_MODIFIED + ":" + language, page.LastRevision.TimeStamp.ToString("o")); var feature = new Feature(new Point(coordinate), attributes); feature.SetTitles(); feature.SetId(); return(feature); }
private async Task <FeatureCollection> ConvertPageToFeatureCollection(WikiPage page, string language) { await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new ExtractsPropertyProvider { AsPlainText = true, IntroductionOnly = true, MaxSentences = 1 }, new PageImagesPropertyProvider { QueryOriginalImage = true }, new GeoCoordinatesPropertyProvider { QueryPrimaryCoordinate = true }, new RevisionsPropertyProvider { FetchContent = false } } }); if (page.Exists == false) { return(null); } var geoCoordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>().PrimaryCoordinate; if (geoCoordinate.IsEmpty) { return(null); } var coordinate = new Coordinate(geoCoordinate.Longitude, geoCoordinate.Latitude); var attributes = GetAttributes(coordinate, page.Title, page.Id.ToString(), language); attributes.Add(FeatureAttributes.DESCRIPTION + ":" + language, page.GetPropertyGroup <ExtractsPropertyGroup>().Extract ?? string.Empty); var imageUrl = page.GetPropertyGroup <PageImagesPropertyGroup>().OriginalImage.Url ?? string.Empty; attributes.Add(FeatureAttributes.IMAGE_URL, imageUrl.EndsWith(".svg") ? string.Empty : imageUrl); attributes.Add(FeatureAttributes.POI_USER_NAME, page.LastRevision.UserName); attributes.Add(FeatureAttributes.POI_USER_ADDRESS, _wikiSites[language].SiteInfo.MakeArticleUrl($"User:{Uri.EscapeUriString(page.LastRevision.UserName)}")); attributes.Add(FeatureAttributes.POI_LAST_MODIFIED, page.LastRevision.TimeStamp.ToString("o")); return(new FeatureCollection(new Collection <IFeature> { new Feature(new Point(coordinate), attributes) })); }
public async Task <FeatureCollection> GetById(string id) { var language = id.Split('_').First(); var pageId = id.Split('_').Last(); var site = _wikiSites[language]; var stub = await WikiPageStub.FromPageIds(site, new[] { int.Parse(pageId) }).First(); var page = new WikiPage(site, stub.Title); await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new ExtractsPropertyProvider { AsPlainText = true, IntroductionOnly = true, MaxSentences = 1 }, new PageImagesPropertyProvider { QueryOriginalImage = true }, new GeoCoordinatesPropertyProvider { QueryPrimaryCoordinate = true } } }); var geoCoordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>().PrimaryCoordinate; var coordinate = new Coordinate(geoCoordinate.Longitude, geoCoordinate.Latitude); var attributes = GetAttributes(coordinate, page.Title, id, language); attributes.Add(FeatureAttributes.DESCRIPTION, page.GetPropertyGroup <ExtractsPropertyGroup>().Extract ?? string.Empty); attributes.Add(FeatureAttributes.IMAGE_URL, page.GetPropertyGroup <PageImagesPropertyGroup>().OriginalImage.Url); attributes.Add(FeatureAttributes.WEBSITE, $"https://{language}.wikipedia.org/?curid={page.Id}"); return(new FeatureCollection(new Collection <IFeature> { new Feature(new Point(coordinate), attributes) })); }
public async Task WikiaCategoryMembersGeneratorTest() { var site = await WikiaTestSiteAsync; var cat = new WikiPage(site, "Category:BlogListingPage"); await cat.RefreshAsync(); Output.WriteLine(cat.ToString()); var generator = new CategoryMembersGenerator(cat) { PaginationSize = 50 }; var pages = await generator.EnumPagesAsync().ToList(); TracePages(pages); AssertTitlesDistinct(pages); var catInfo = cat.GetPropertyGroup <CategoryInfoPropertyGroup>(); Assert.Equal(catInfo.MembersCount, pages.Count); }
public async Task WpCategoryMembersGeneratorTest() { var site = await WpTest2SiteAsync; var cat = new WikiPage(site, "Category:Template documentation pages"); await cat.RefreshAsync(); WriteOutput(cat); var generator = new CategoryMembersGenerator(cat) { PaginationSize = 50 }; var pages = await generator.EnumPagesAsync().ToListAsync(); TracePages(pages); AssertTitlesDistinct(pages); var catInfo = cat.GetPropertyGroup <CategoryInfoPropertyGroup>(); Assert.Equal(catInfo.MembersCount, pages.Count); }
public async Task WpLzhPageExtractTest() { var site = await WpLzhSiteAsync; var page = new WikiPage(site, "莎拉伯恩哈特"); await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new ExtractsPropertyProvider { AsPlainText = true, IntroductionOnly = true, MaxSentences = 1 } } }); ShallowTrace(page); Assert.Equal("莎拉·伯恩哈特,一八四四年生,法國巴黎人也。", page.GetPropertyGroup <ExtractsPropertyGroup>().Extract); }
public async Task WpTest2PageGeoCoordinateTest() { var site = await WpTest2SiteAsync; var page = new WikiPage(site, "France"); await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new GeoCoordinatesPropertyProvider() } }); ShallowTrace(page); var coordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>(); Assert.False(coordinate.PrimaryCoordinate.IsEmpty); Assert.Equal(47, coordinate.PrimaryCoordinate.Latitude, 12); Assert.Equal(2, coordinate.PrimaryCoordinate.Longitude, 12); Assert.Equal(GeoCoordinate.Earth, coordinate.PrimaryCoordinate.Globe); }
public async Task WpEnPageGeoCoordinateTest() { var site = await WpEnSiteAsync; var page = new WikiPage(site, "Paris"); await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new GeoCoordinatesPropertyProvider() } }); ShallowTrace(page); var coordinate = page.GetPropertyGroup <GeoCoordinatesPropertyGroup>(); Assert.NotNull(coordinate); ShallowTrace(coordinate); Assert.False(coordinate.PrimaryCoordinate.IsEmpty); Assert.Equal(48.856613, coordinate.PrimaryCoordinate.Latitude, 5); Assert.Equal(2.352222, coordinate.PrimaryCoordinate.Longitude, 5); Assert.Equal(GeoCoordinate.Earth, coordinate.PrimaryCoordinate.Globe); }
public async Task WpLzhPageImagesTest() { var site = await WpLzhSiteAsync; var page = new WikiPage(site, "挪威"); await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new PageImagesPropertyProvider { QueryOriginalImage = true, ThumbnailSize = 100 } } }); var group = page.GetPropertyGroup <PageImagesPropertyGroup>(); ShallowTrace(group); Assert.Equal("Flag_of_Norway.svg", group.ImageTitle); Assert.Equal("https://upload.wikimedia.org/wikipedia/commons/d/d9/Flag_of_Norway.svg", group.OriginalImage.Url); Assert.Equal("https://upload.wikimedia.org/wikipedia/commons/thumb/d/d9/Flag_of_Norway.svg/100px-Flag_of_Norway.svg.png", group.ThumbnailImage.Url); Assert.Equal(100, Math.Max(group.ThumbnailImage.Width, group.ThumbnailImage.Height)); }
public async Task <WikipediaArticle> GetArticleAsync(string language, string name) { if (rateLimiter.IsRatelimited()) { return(null); } var article = new WikipediaArticle { Url = $"https://{language}.wikipedia.org/wiki/{name}" }; var page = new WikiPage(wikipediaSite, name); await page.RefreshAsync(new WikiPageQueryProvider { Properties = { new ExtractsPropertyProvider { MaxCharacters = 1024, AsPlainText = true, IntroductionOnly = true } } }); var extractGroup = page.GetPropertyGroup <ExtractsPropertyGroup>(); article.Name = page.Title; article.Url = WikiLink.Parse(wikipediaSite, name).TargetUrl; article.Description = extractGroup.Extract; if (article.Description.Length >= 1024) { var split = article.Description.Split(". ").ToList(); article.Description = string.Join(". ", split.Take(4)) + "."; } var response = await HttpWebClient.ReturnStringAsync(new System.Uri($"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&sites={language}wiki&props=claims&titles={name}")); var jsonresp = JObject.Parse(response); var container = (JObject)jsonresp["entities"].First.Value <JProperty>().Value; var claims = container["claims"]; //P18/P154/P242/P109/P1621 JToken snak = null; if (claims["P18"] is not null) { snak = claims["P18"]; } else if (claims["P154"] is not null) { snak = claims["P154"]; } else if (claims["P242"] is not null) { snak = claims["P242"]; } else if (claims["P109"] is not null) { snak = claims["P109"]; } else if (claims["P1621"] is not null) { snak = claims["P1621"]; } if (snak is not null) { var val = snak.First["mainsnak"]["datavalue"]["value"].ToObject <string>(); val = val.Replace(" ", "_"); var md5 = val.CreateMD5(true);; article.ImageUrl = $"https://upload.wikimedia.org/wikipedia/commons/{md5[0]}/{md5[0]}{md5[1]}/{val}"; } return(article); }