private static void UpdateSourceField(Page page) { Match m; if ((m = picasaAlbumUrlRegex.Match(page.text)).Success || (m = picasaUserUrlRegex.Match(page.text)).Success) { page.DownloadImage("temp_wiki_image"); string userId = m.Groups[2].ToString(); string chosenAlbumName = m.Groups.Count >= 3 ? m.Groups[3].ToString() : null; // A common existing error if (chosenAlbumName == "Eakins") { chosenAlbumName = "EakinsThomas"; } string userApiUrl = "http://picasaweb.google.com/data/feed/api/user/" + userId; string userApiPage = WgetToString(userApiUrl); if (userApiPage == null) return; Console.WriteLine("Searching for image URL. Retrieving info about candidate images..."); List<string> photoIds = new List<string>(); foreach (Match m2 in userPageAlbumIdRegex.Matches(userApiPage)) { string albumName = m2.Groups[2].ToString(); if (chosenAlbumName == null || albumName == chosenAlbumName) { string albumId = m2.Groups[1].ToString(); string albumApiUrl = "http://picasaweb.google.com/data/feed/api/user/" + userId + "/albumid/" + albumId; string albumApiPage = WgetToString(albumApiUrl); foreach (Match m3 in albumPageImageIdRegex.Matches(albumApiPage)) { string photoId = m3.Groups[1].ToString(); string licenseName = null; PhotoInfo info = new PhotoInfo(); info.albumId = albumId; info.albumName = albumName; photoIds.Add(photoId); if (idToPhotoInfo.ContainsKey(photoId)) continue; if (!GetImageInfo(userId, albumId, photoId, out licenseName, out info.mediaUrl, out info.summary)) { photoIds.Remove(photoId); continue; } idToPhotoInfo.Add(photoId, info); } } } // Sort by the longest common substring between the summary and filename photoIds.Sort(new Comparison<string>(delegate(string leftId, string rightId) { string titleNoPrefix = page.title.Substring("File:".Length); return LongestCommonSubstring(idToPhotoInfo[rightId].summary, titleNoPrefix) .CompareTo(LongestCommonSubstring(idToPhotoInfo[leftId].summary, titleNoPrefix)); })); Console.WriteLine("Doing image comparisons..."); foreach (string photoId in photoIds) { PhotoInfo info = idToPhotoInfo[photoId]; string photoCachedFilename = "photo" + photoId; while (!File.Exists(photoCachedFilename) || new FileInfo(photoCachedFilename).Length == 0) { string mediaUrlFull = new Regex("^(.*)/([^/]*)$").Replace(info.mediaUrl, "${1}/d/${2}"); Console.WriteLine("Fetching photo with ID " + photoId + "..."); WgetToFile(mediaUrlFull, photoCachedFilename); } if (FilesAreIdentical(photoCachedFilename, "temp_wiki_image")) { UpdateSource(page, userId, info.albumName, photoId); return; } string failureReason; if (UploadOriginalVersion(out failureReason, page, info.mediaUrl, "temp_wiki_image", photoCachedFilename, /*fetchThumbnailVersion*/false, /*allowWikiBigger*/true)) { UpdateSource(page, userId, info.albumName, photoId); return; } } Console.WriteLine("Image not found"); } else { if (!page.text.Contains("waldemar")) { // For debugging, catch ones where we couldn't figure out the Picasa URL //Debugger.Break(); } } }