Пример #1
0
        public async Task <WikiImage> GetImage()
        {
            HttpClient client = new HttpClient();

            string reqUri = API_BASE +
                            "?action=query" +
                            "&prop=imageinfo|categories|globalusage" +
                            "&iiprop=url|size|extmetadata|mediatype" +
                            "&iiurlheight=1080" +
                            "&format=json" +
                            "&generator=random" +
                            "&grnnamespace=6";

            var productValue = new ProductInfoHeaderValue("ImageDescBot", "1.0");
            var commentValue = new ProductInfoHeaderValue("(+https://github.com/smurfpandey/ImageDescribeBot)");

            try
            {
                client.DefaultRequestHeaders.UserAgent.Add(productValue);
                client.DefaultRequestHeaders.UserAgent.Add(commentValue);

                // make api rquest to wikimedia
                string response = await client.GetStringAsync(reqUri);

                JObject jObj      = JsonConvert.DeserializeObject <JObject>(response);
                JToken  imageData = jObj["query"]["pages"].First.First;

                WikimediaResponse objWImage = imageData.ToObject <WikimediaResponse>();

                if (IsValidImage(objWImage))
                {
                    return(objWImage.ImageInfo[0]);
                }
            }
            catch (Exception ex)
            {
                _logger.Error("error getting image from wikipedia", ex);
            }

            return(null);
        }
Пример #2
0
        private bool IsValidImage(WikimediaResponse objWImage)
        {
            WikiImage objIInfo = objWImage.ImageInfo[0];

            // check that the file is actually a picture
            if (objIInfo.MediaType != "BITMAP")
            {
                return(false);
            }

            // Make sure the picture is big enough
            if (objIInfo.Width <= 50 || objIInfo.Height <= 50)
            {
                return(false);
            }

            // Make sure the format is supported
            string thisExtn = Path.GetExtension(objIInfo.Url);

            if (Array.IndexOf(FORMATS, thisExtn) < 0)
            {
                return(false);
            }

            // We got a picture, now let's verify we can use it.

            //Check file name for bad words
            if (objCensor.IsBlacklisted(objWImage.Title))
            {
                _logger.InfoFormat("Image discarded, {0}. badword in page title: {1}", objIInfo.DescriptionUrl, objWImage.Title);
                return(false);
            }

            // Check picture title for bad words
            if (objCensor.IsBlacklisted(objIInfo.Metadata.ObjectName.value.ToString()))
            {
                _logger.InfoFormat("Image discarded, {0}. badword in picture title: {1}", objIInfo.DescriptionUrl, objIInfo.Metadata.ObjectName.value.ToString());
                return(false);
            }

            // Check restrictions for more bad words
            if (objCensor.IsBlacklisted(objIInfo.Metadata.Restrictions.value.ToString()))
            {
                _logger.InfoFormat("Image discarded, {0}. badword in restrictions: {1}", objIInfo.DescriptionUrl, objIInfo.Metadata.Restrictions.value.ToString());
                return(false);
            }

            // Check file description for bad words
            if (objIInfo.Metadata.ImageDescription != null)
            {
                string cleanedDescription = Utility.GetTextFromHtml(objIInfo.Metadata.ImageDescription.value.ToString());
                if (objCensor.IsBlacklisted(cleanedDescription))
                {
                    _logger.InfoFormat("Image discarded, {0}. badword in image description: {1}", objIInfo.DescriptionUrl, cleanedDescription);
                    return(false);
                }

                if (objCensor.ShouldFilterForPhrase(cleanedDescription))
                {
                    _logger.InfoFormat("Image discarded, {0}. blacklisted phrase in image description: {1}", objIInfo.DescriptionUrl, cleanedDescription);
                    return(false);
                }
            }

            // The mediawiki API is awful, there's another list of categories which
            // is not the same as the one requested by asking for "categories".
            // Fortunately it's still in the API response, under extmetadata.
            List <string> lstCateg = new List <string>();

            lstCateg.Add(objIInfo.Metadata.Categories.value.ToString());
            foreach (dynamic objCateg in objWImage.Category)
            {
                lstCateg.Add(objCateg.title.ToString());
            }

            if (objCensor.ShouldFilterForCategory(lstCateg))
            {
                _logger.InfoFormat("Image discarded, {0}. blacklisted phrase in categories", objIInfo.DescriptionUrl);
                return(false);
            }

            // #TODO:: check parent categories for each category in metadata,
            // and compare them against the blacklist too. This will require
            // extra API calls

            // if the picture is used in any wikipage with unwanted themes, we probably
            // don't want to use it.
            foreach (dynamic wikipage in objWImage.GlobalUsage)
            {
                if (objCensor.IsBlacklisted(wikipage.title.ToString()))
                {
                    _logger.InfoFormat("Image discarded, {0}. page usage {1}", objIInfo.DescriptionUrl, wikipage.title.ToString());
                    return(false);
                }
                if (objCensor.ShouldFilterForCategory(wikipage.title.ToString()))
                {
                    _logger.InfoFormat("Image discarded, {0}. page usage {1}", objIInfo.DescriptionUrl, wikipage.title.ToString());
                    return(false);
                }
            }
            return(true);
        }