public TitlingResult WikipediaSummarize(TitlingRequest req, string htmlDoc)
        {
            WikipediaArticle article = WikipediaTools.Parse(htmlDoc);

            // The paragraph to be summarized.
            string p = null;

            // Check if the URL has an anchor to a specific section in the article.
            int anchorIndex = req.Url.IndexOf("#", StringComparison.OrdinalIgnoreCase);

            if (anchorIndex >= 0 && (anchorIndex + 1) < req.Url.Length)
            {
                var anchorId = req.Url.Substring(anchorIndex + 1);
                p = article.GetFirstParagraph(anchorId);
            }
            // If no anchor or if we couldn't extract a paragraph for the specific anchor,
            // get first paragraph of the article.
            if (p == null && article.SummaryParagraphs.Length > 0)
            {
                p = article.SummaryParagraphs[0];
            }

            if (!string.IsNullOrWhiteSpace(p))
            {
                string summary = Format.Shorten(p, MaxCharacters, ContinuationSymbol);
                req.IrcTitle.SetFormat("[ {0} ]", summary);
            }

            return(req.CreateResult(true));
        }
Beispiel #2
0
        public async Task <WikipediaArticle> GetArticleAsync(string language, string name)
        {
            if (rateLimiter.IsRatelimited())
            {
                return(null);
            }

            var article = new WikipediaArticle
            {
                Url = $"https://{language}.wikipedia.org/wiki/{name}"
            };

            {
                var pageInfo = await HttpWebClient.ReturnStringAsync(new Uri($"https://{language}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles={name}")).ConfigureAwait(false);

                var     jsonresp = JObject.Parse(pageInfo);
                dynamic item     = jsonresp["query"]["pages"].First.First;

                string desc = Convert.ToString(item["extract"]);

                article.Name = item["title"].ToString();

                if (desc.Length >= 1024)
                {
                    var split = desc.Split(". ").ToList();
                    article.Description = string.Join(". ", split.Take(4)) + ".";
                }
                else
                {
                    article.Description = desc;
                }
            }

            {
                var pageImage = await HttpWebClient.ReturnStringAsync(new Uri($"https://{language}.wikipedia.org/w/api.php?format=json&action=query&prop=pageimages&piprop=original&titles={name}")).ConfigureAwait(false);

                var     jsonresp = JObject.Parse(pageImage);
                dynamic item     = jsonresp["query"]["pages"].First.First["original"];

                if (item != null)
                {
                    article.Original = new WikipediaImage
                    {
                        Source = item["source"].ToString(),
                        Width  = Convert.ToInt32(item["width"]),
                        Height = Convert.ToInt32(item["height"])
                    };
                }
            }

            return(article);
        }
Beispiel #3
0
        public async Task <WikipediaArticle> GetArticleAsync(string language, string name)
        {
            if (rateLimiter.IsRatelimited())
            {
                return(null);
            }

            var article = new WikipediaArticle
            {
                Url = $"https://{language}.wikipedia.org/wiki/{name}"
            };

            var page = new WikiPage(wikipediaSite, name);

            await page.RefreshAsync(new WikiPageQueryProvider
            {
                Properties =
                {
                    new ExtractsPropertyProvider
                    {
                        MaxCharacters    = 1024,
                        AsPlainText      = true,
                        IntroductionOnly = true
                    }
                }
            });

            var extractGroup = page.GetPropertyGroup <ExtractsPropertyGroup>();

            article.Name = page.Title;
            article.Url  = WikiLink.Parse(wikipediaSite, name).TargetUrl;

            article.Description = extractGroup.Extract;

            if (article.Description.Length >= 1024)
            {
                var split = article.Description.Split(". ").ToList();
                article.Description = string.Join(". ", split.Take(4)) + ".";
            }

            var response = await HttpWebClient.ReturnStringAsync(new System.Uri($"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&sites={language}wiki&props=claims&titles={name}"));

            var jsonresp  = JObject.Parse(response);
            var container = (JObject)jsonresp["entities"].First.Value <JProperty>().Value;
            var claims    = container["claims"];

            //P18/P154/P242/P109/P1621
            JToken snak = null;

            if (claims["P18"] is not null)
            {
                snak = claims["P18"];
            }
            else if (claims["P154"] is not null)
            {
                snak = claims["P154"];
            }
            else if (claims["P242"] is not null)
            {
                snak = claims["P242"];
            }
            else if (claims["P109"] is not null)
            {
                snak = claims["P109"];
            }
            else if (claims["P1621"] is not null)
            {
                snak = claims["P1621"];
            }

            if (snak is not null)
            {
                var val = snak.First["mainsnak"]["datavalue"]["value"].ToObject <string>();

                val = val.Replace(" ", "_");

                var md5 = val.CreateMD5(true);;

                article.ImageUrl = $"https://upload.wikimedia.org/wikipedia/commons/{md5[0]}/{md5[0]}{md5[1]}/{val}";
            }

            return(article);
        }