public PictureList GetPictures(PictureSearch ps) { WallbaseImageSearchSettings wiss = string.IsNullOrEmpty(ps.SearchProvider.ProviderConfig) ? new WallbaseImageSearchSettings() : WallbaseImageSearchSettings.LoadFromXML(ps.SearchProvider.ProviderConfig); //if max picture count is 0, then no maximum, else specified max var maxPictureCount = wiss.GetMaxImageCount(ps.MaxPictureCount); int pageSize = wiss.GetPageSize(); int pageIndex = ps.PageToRetrieve; //set page to retreive if one is specified var imgFoundCount = 0; //authenticate to wallbase Authenticate(wiss.Username, wiss.Password); var wallResults = new List<Picture>(); string areaURL = wiss.BuildURL(); do { //calculate page index. Random does not use pages, so for random just refresh with same url string strPageNum = (pageIndex * pageSize).ToString(); string pageURL = areaURL.Contains("{0}") ? string.Format(areaURL, strPageNum) : areaURL; //string content = HttpPost(pageURL, postParams); string content = string.Empty; using (HttpUtility.CookieAwareWebClient _client = new HttpUtility.CookieAwareWebClient(_cookies)) { try { //if random then don't post values //if (wiss.SA == "random") //{ content = _client.DownloadString(pageURL); //} //else //{ // byte[] reqResult = _client.UploadValues(pageURL, wiss.GetPostParams()); // content = System.Text.Encoding.Default.GetString(reqResult); //} } catch (Exception ex) { Log.Logger.Write(string.Format("Failed to download search results from wallbase.cc, error: {0}", ex.ToString()), Log.LoggerLevels.Warnings); } } if (string.IsNullOrEmpty(content)) break; //parse html and get count var pics = ParsePictures(content); imgFoundCount = pics.Count(); //if we have an image ban list check for them // doing this in the provider instead of picture manager // ensures that our count does not go down if we have a max if (ps.BannedURLs != null && ps.BannedURLs.Count > 0) { pics = (from c in pics where !(ps.BannedURLs.Contains(c.Url)) select c).ToList(); } wallResults.AddRange(pics); //increment page index so we can get the next set of images if they exist pageIndex++; } while (imgFoundCount > 0 && wallResults.Count < maxPictureCount && ps.PageToRetrieve == 0); PictureList result = FetchPictures(wallResults, ps.PreviewOnly); result.Pictures = result.Pictures.Take(maxPictureCount).ToList(); return result; }
private string GetDirectPictureUrl(string pageUrl) { using (HttpUtility.CookieAwareWebClient cawc = new HttpUtility.CookieAwareWebClient(_cookies)) { var content = cawc.DownloadString(pageUrl); if (string.IsNullOrEmpty(content)) return string.Empty; var regex = new Regex(@"<img.*src=""(?<img>.*(wallpaper.*\.(jpg|png)))"""); //var regex = new Regex(@"\+B\('(?<img>.*?)'\)"); var m = regex.Match(content); if (m.Groups["img"].Success && !string.IsNullOrEmpty(m.Groups["img"].Value)) { return m.Groups["img"].Value; //byte[] decoded = Convert.FromBase64String(m.Groups["img"].Value); //string final = Encoding.Default.GetString(decoded); //return final; } return string.Empty; } }
public PictureList GetPictures(PictureSearch ps) { var result = new PictureList() { FetchDate = DateTime.Now }; //load provider search settings GoogleImageSearchSettings giss = GoogleImageSearchSettings.LoadFromXML(ps.SearchProvider.ProviderConfig) ?? new GoogleImageSearchSettings(); //if search is empty, return now since we can't search without it if (string.IsNullOrEmpty(giss.Query)) return result; var pageIndex = ps.PageToRetrieve; //set page to retrieve if one specified var imgFoundCount = 0; //if max picture count is 0, then no maximum, else specified max var maxPictureCount = ps.MaxPictureCount > 0?ps.MaxPictureCount : int.MaxValue; //build tbs strring var tbs = "";//isz:ex,iszw:{1},iszh:{2} //handle sizeing if (giss.ImageHeight > 0 && giss.ImageWidth > 0) { tbs += string.Format("isz:ex,iszw:{0},iszh:{1},", giss.ImageWidth, giss.ImageHeight); } //handle colors if (!string.IsNullOrEmpty(giss.Color)) { tbs += GoogleImageSearchSettings.GoogleImageColors.GetColorSearchString((from c in GoogleImageSearchSettings.GoogleImageColors.GetColors() where c.Value == giss.Color select c).Single()) + ","; } //if we have a filter string then add it and trim off trailing commas if (!string.IsNullOrEmpty(tbs)) tbs = ("&tbs=" + tbs).Trim(new char[]{','}); //do safe search setup (off/strict/moderate) this is part of the session and tracked via cookies //SetSafeSearchSetting(giss.GoogleSafeSearchOption); do { //build URL from query, dimensions and page index var url = string.Format(baseURL, giss.Query, tbs, (pageIndex * 20)); var response = string.Empty; using (var client = new HttpUtility.CookieAwareWebClient(_cookies)) { response = client.DownloadString(url); } var images = _imagesRegex2.Matches(response); //track number of images found for paging purposes imgFoundCount = images.Count; //convert images found into picture entries foreach (Match item in images) { var purl = item.Groups["imgurlgrp"].Value; var referrer = item.Groups["imgrefgrp"].Value; var thumbnail = item.Groups["thumbURL"].Value; //get id and trim if necessary (ran into a few cases of rediculously long filenames) var id = System.IO.Path.GetFileNameWithoutExtension(purl); if (id.Length > 50) id = id.Substring(0, 50); //because google images come from so many sites it's not uncommon to have duplicate file names. (we fix this) id = string.Format("{0}_{1}", id, purl.GetHashCode()); var p = new Picture() { Url = purl, Id = id }; p.Properties.Add(Picture.StandardProperties.Thumbnail, thumbnail); p.Properties.Add(Picture.StandardProperties.Referrer, referrer); result.Pictures.Add(p); } //if we have an image ban list check for them // doing this in the provider instead of picture manager // ensures that our count does not go down if we have a max if (ps.BannedURLs != null && ps.BannedURLs.Count > 0) { result.Pictures = (from c in result.Pictures where !(ps.BannedURLs.Contains(c.Url)) select c).ToList(); } //increment page index so we can get the next 20 images if they exist pageIndex++; // Max Picture count is defined in search settings passed in, check for it here too } while (imgFoundCount > 0 && result.Pictures.Count < maxPictureCount && ps.PageToRetrieve == 0); result.Pictures = result.Pictures.Take(maxPictureCount).ToList(); return result; }
private void Authenticate(string username, string password) { //if we have a username/password and we aren't already authenticated then authenticate if (!string.IsNullOrEmpty(username) && !string.IsNullOrEmpty(password)) { using(HttpUtility.CookieAwareWebClient _client = new HttpUtility.CookieAwareWebClient(_cookies)) { //check if the user is already logged in (doh!) try { var loginReg = @"<span class=""name"".*?" + username + "</span>"; string homepage = _client.DownloadString("http://wallbase.cc"); if (Regex.Match(homepage, loginReg, RegexOptions.IgnoreCase).Success) { return; } } catch(Exception ex) { Log.Logger.Write(string.Format("There was an error trying to check for a pre-existing wallbase auth, ignoring it. Exception details: {0}", ex.ToString()), Log.LoggerLevels.Errors); } try { //need to extract the cross-site request forgery token from the page //<img.*src=""(?<img>.*(wallpaper.*\.(jpg|png)))"" var csrfRegex = new Regex(@"<input type=""hidden"" name=""csrf"" value=""(?<csrf>.*)"">"); var refWallbase64Regex = new Regex(@"<input type=""hidden"" name=""ref"" value=""(?<ref>.*)"">"); string loginPage = _client.DownloadString("http://wallbase.cc/user/login"); Match lpM = csrfRegex.Match(loginPage); Match lpWallbaseInbase64 = refWallbase64Regex.Match(loginPage); if (!lpM.Success) return; var loginData = new NameValueCollection(); loginData.Add("csrf", lpM.Groups["csrf"].Value); loginData.Add("ref", lpWallbaseInbase64.Groups["ref"].Value); loginData.Add("username", username); loginData.Add("password", password); _client.Referrer = "http://wallbase.cc/user/login"; _client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded"); byte[] result = _client.UploadValues(@"http://wallbase.cc/user/do_login", "POST", loginData); //we do not need the response, all we need are the cookies string response = System.Text.Encoding.UTF8.GetString(result); } catch (Exception ex) { throw new WallbaseAccessDeniedException("Wallbase authentication failed. Please verify your username and password.", ex); } } } }
private void SetSafeSearchSetting(GoogleImageSearchSettings.GoogleSafeSearchOptions gsso) { using (var client = new HttpUtility.CookieAwareWebClient(_cookies)) { //First we need to access the preferences page so we can get the special ID var response = client.DownloadString("http://images.google.com/preferences?hl=en"); //parse out signature var specialID = Regex.Match(response, "<input type=\"hidden\" name=\"sig\" value=\"(?<sig>.*?)\">").Groups["sig"].Value; //options are "on", "images", "off" var safeUIOption = ""; switch(gsso) { case GoogleImageSearchSettings.GoogleSafeSearchOptions.Off: safeUIOption = "off"; break; case GoogleImageSearchSettings.GoogleSafeSearchOptions.On: safeUIOption = "on"; break; } //set prefs string url = string.Format("http://images.google.com/setprefs?sig={0}&hl=en&lr=lang_en&uulo=1&muul=4_20&luul=&safeui={1}&suggon=1&newwindow=0&q=", specialID.Replace("=", "%3D"), safeUIOption); var finalResponse = client.DownloadString(url); } }