Esempio n. 1
0
        private static void UpdateSourceField(Page page)
        {
            Match m;
            if ((m = picasaAlbumUrlRegex.Match(page.text)).Success ||
                (m = picasaUserUrlRegex.Match(page.text)).Success)
            {
                page.DownloadImage("temp_wiki_image");

                string userId = m.Groups[2].ToString();
                string chosenAlbumName = m.Groups.Count >= 3 ? m.Groups[3].ToString() : null;

                // A common existing error
                if (chosenAlbumName == "Eakins")
                {
                    chosenAlbumName = "EakinsThomas";
                }

                string userApiUrl = "http://picasaweb.google.com/data/feed/api/user/" + userId;
                string userApiPage = WgetToString(userApiUrl);
                if (userApiPage == null) return;

                Console.WriteLine("Searching for image URL. Retrieving info about candidate images...");
                List<string> photoIds = new List<string>();
                foreach (Match m2 in userPageAlbumIdRegex.Matches(userApiPage))
                {
                    string albumName = m2.Groups[2].ToString();
                    if (chosenAlbumName == null || albumName == chosenAlbumName)
                    {
                        string albumId = m2.Groups[1].ToString();
                        string albumApiUrl = "http://picasaweb.google.com/data/feed/api/user/" + userId + "/albumid/" + albumId;
                        string albumApiPage = WgetToString(albumApiUrl);

                        foreach (Match m3 in albumPageImageIdRegex.Matches(albumApiPage))
                        {
                            string photoId = m3.Groups[1].ToString();
                            string licenseName = null;
                            PhotoInfo info = new PhotoInfo();
                            info.albumId = albumId;
                            info.albumName = albumName;

                            photoIds.Add(photoId);
                            if (idToPhotoInfo.ContainsKey(photoId)) continue;
                            if (!GetImageInfo(userId, albumId, photoId, out licenseName, out info.mediaUrl, out info.summary))
                            {
                                photoIds.Remove(photoId);
                                continue;
                            }
                            idToPhotoInfo.Add(photoId, info);
                        }
                    }
                }

                // Sort by the longest common substring between the summary and filename
                photoIds.Sort(new Comparison<string>(delegate(string leftId, string rightId)
                    {
                        string titleNoPrefix = page.title.Substring("File:".Length);
                        return LongestCommonSubstring(idToPhotoInfo[rightId].summary, titleNoPrefix)
                              .CompareTo(LongestCommonSubstring(idToPhotoInfo[leftId].summary, titleNoPrefix));
                    }));

                Console.WriteLine("Doing image comparisons...");
                foreach (string photoId in photoIds)
                {
                    PhotoInfo info = idToPhotoInfo[photoId];
                    string photoCachedFilename = "photo" + photoId;

                    while (!File.Exists(photoCachedFilename) || new FileInfo(photoCachedFilename).Length == 0)
                    {
                        string mediaUrlFull = new Regex("^(.*)/([^/]*)$").Replace(info.mediaUrl, "${1}/d/${2}");
                        Console.WriteLine("Fetching photo with ID " + photoId + "...");
                        WgetToFile(mediaUrlFull, photoCachedFilename);
                    }

                    if (FilesAreIdentical(photoCachedFilename, "temp_wiki_image"))
                    {
                        UpdateSource(page, userId, info.albumName, photoId);
                        return;
                    }

                    string failureReason;
                    if (UploadOriginalVersion(out failureReason, page, info.mediaUrl, "temp_wiki_image", photoCachedFilename, /*fetchThumbnailVersion*/false, /*allowWikiBigger*/true))
                    {
                        UpdateSource(page, userId, info.albumName, photoId);
                        return;
                    }
                }

                Console.WriteLine("Image not found");
            }
            else
            {
                if (!page.text.Contains("waldemar"))
                {
                    // For debugging, catch ones where we couldn't figure out the Picasa URL
                    //Debugger.Break();
                }
            }
        }