private void GetPictures() { try { string query = string.Empty; int startPosition = 0; int resultsRequested = 0; SafeSearchFiltering safeSearch = SafeSearchFiltering.Moderate; txtQuery.Invoke( new MethodInvoker( delegate { query = Regex.Replace(txtQuery.Text, @"\s{1,}", "+"); startPosition = Decimal.ToInt32(nudStartPosition.Value); resultsRequested = Decimal.ToInt32(nudNumOfResults.Value); safeSearch = (SafeSearchFiltering) Enum.Parse(typeof(SafeSearchFiltering), (string)cmbSafeSearch.SelectedItem); })); SearchResponse response = SearchService.SearchImages(query, startPosition, resultsRequested, chkFilter.Checked, safeSearch); if (response.Results.Length == 0) { MessageBox.Show("No results available"); } // Note: This is not a good implementation - it generates a lot of threads and with a large number // of requested results will be a problem. Using a pool (s.a. the ThreadPool) would be easier, // But then I need to add some waiting mechanism, and I'm too lazy. // After all, it's just a sample application so show how to work with the API. Thread[] threads = new Thread[response.Results.Length]; for (int i = 0; i < response.Results.Length; i++) { int index = i; threads[index] = new Thread( new ThreadStart( delegate { Image img = getImage(response.Results[index].ThumbnailUrl); Invoke( new MethodInvoker( delegate { PictureBox pic = new PictureBox(); pic.BorderStyle = BorderStyle.Fixed3D; pic.Size = imageSize; pic.Location = new Point(imageSize.Width * (index % 4), (index / 4) * imageSize.Height + startHeight); pic.SizeMode = PictureBoxSizeMode.CenterImage; pic.Image = img; pics.Add(pic); pic.Tag = response.Results[index]; pic.DoubleClick += new EventHandler(pic_DoubleClick); // Adjust for scrolled location if (AutoScrollPosition.Y != 0) { pic.Location = new Point(pic.Location.X, pic.Location.Y + AutoScrollPosition.Y); } Controls.Add(pic); })); })); threads[i].IsBackground = true; threads[i].Start(); } foreach (Thread thread in threads) { thread.Join(); } MessageBox.Show("Done!"); } catch (Exception ex) { Invoke( new MethodInvoker( delegate { MessageBox.Show( string.Format("An exception occurred while running the query: {0}{1}{2}", ex.Message, Environment.NewLine, ex.StackTrace), "Query Aborted!", MessageBoxButtons.OK, MessageBoxIcon.Error); })); } finally { Invoke( new MethodInvoker( delegate { btnSearch.Enabled = true; btnLoadRegex.Enabled = true; })); } }
/// <summary> /// Runs the given query against Google Image Search and returns a SearchResponse object with details /// for each returned image. /// </summary> /// <param name="query">The query to be sent.</param> /// <param name="startPosition">The index of the first item to be retrieved (must be positive).</param> /// <param name="resultsRequested">The number of results to be retrieved (must be between 1 and (MAX_RESULTS - startPosition)</param> /// <param name="filterSimilarResults">Set to 'true' if you want Google to automatically omit similar entries. Set to 'false' if you want to retrieve every matching image.</param> /// <param name="safeSearch">Indicates what level of SafeSearch to use.</param> /// <returns>A SearchResponse object with details for each returned image.</returns> public static SearchResponse SearchImages(string query, int startPosition, int resultsRequested, bool filterSimilarResults, SafeSearchFiltering safeSearch) { // Check preconditions if (resultsRequested < 1) { throw new ArgumentOutOfRangeException("resultsRequested", "Value must be positive"); } else if (startPosition < 0) { throw new ArgumentOutOfRangeException("startPosition", "Value must be positive"); } else if (resultsRequested + startPosition > MAX_RESULTS) { throw new ArgumentOutOfRangeException("resultsRequested", "Sorry, Google does not serve more than 1000 results for any query"); } string safeSearchStr = safeSearch.ToString().ToLower(); SearchResponse response = new SearchResponse(); ArrayList results = new ArrayList(); // Since Google returns 20 results at a time, we have to run the query over and over again (each // time with a different starting position) until we get the requested number of results. for (int i = 0; i < resultsRequested; i+=RESULTS_PER_QUERY) { string requestUri = string.Format("http://images.google.com/images?q={0}&start={1}&filter={2}&safe={3}", query, (startPosition+i).ToString(), (filterSimilarResults)?1.ToString():0.ToString(), safeSearchStr ); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(requestUri); string resultPage = string.Empty; using (HttpWebResponse httpWebResponse = (HttpWebResponse)request.GetResponse()) { using (Stream responseStream = httpWebResponse.GetResponseStream()) { using (StreamReader reader = new StreamReader(responseStream)) { resultPage = reader.ReadToEnd(); } } } // Here's the parsing of the images' details. If the html format changes, that's most probably where we will have to update the code // There are two types of regions in the HTML we have to parse in order to gather all the information // about the images: // 1. The image's url, thumbnail url and thumbnail width&height will be extracted // by the imagesRegex and imagesMatches objects. // 2. The image's width&height&size will be extracted by the dataRegex and dataMatches objects. Regex imagesRegex = new Regex(@"(\x3Ca\s+href=/imgres\x3Fimgurl=)(?<imgurl>http[^&>]*)([>&]{1})([^>]*)(>{1})(<img\ssrc\x3D)(""{0,1})(?<images>/images[^""\s>]*)([\s])+(width=)(?<width>[0-9,]*)\s+(height=)(?<height>[0-9,]*)"); Regex dataRegex = new Regex(@"([^>]*)(>)\s{0,1}(<br>){0,1}\s{0,1}(?<width>[0-9,]*)\s+x\s+(?<height>[0-9,]*)\s+pixels\s+-\s+(?<size>[0-9,]*)(k)"); MatchCollection imagesMatches = imagesRegex.Matches(resultPage); MatchCollection dataMatches = dataRegex.Matches(resultPage); if ((imagesMatches == null) || (imagesMatches.Count == 0) || (dataMatches == null) || (dataMatches.Count == 0)) { Trace.WriteLine("Parsing of query " + query + " failed - collections count mismatch"); break; } // The two MatchCollections should include an entry for each returned image. Therefore, // if they don't have the same number of items, then the parsing has failed and we // stop the query (this is just a provision, in reality using these expressions // for many thousands of queries it never broke here :-) if (imagesMatches.Count != dataMatches.Count) { throw new Exception("Parsing of the response failed for url: " + requestUri); } // Build a SearchResult object for each image for (int j = 0; j < imagesMatches.Count && (i+j) < resultsRequested ; j++) { Match imageMatch = imagesMatches[j]; Match dataMatch = dataMatches[j]; SearchResult result = new SearchResult(); result.ImageUrl = imageMatch.Groups["imgurl"].Value; result.ThumbnailUrl = imageMatch.Groups["images"].Value; result.ThumbnailWidth = int.Parse(imageMatch.Groups["width"].Value); result.ThumbnailHeight = int.Parse(imageMatch.Groups["height"].Value); result.ImageWidth = int.Parse(dataMatch.Groups["width"].Value); result.ImageHeight = int.Parse(dataMatch.Groups["height"].Value); // Since the value in the HTML is in kb, this is only an approximation to the number of bytes result.ImageSize = int.Parse(dataMatch.Groups["size"].Value) * 1000; results.Add(result); } // Extract the total number of results available and make sure we didn't reach the end of the results Regex totalResultsRegex = new Regex(@"(?<lastResult>[0-9,]*)(\s*</b>\s*)(of)(\s)+(about){0,1}(\s*<b>\s*)(?<totalResultsAvailable>[0-9,]*)"); Match totalResultsMatch = totalResultsRegex.Match(resultPage); string totalResultsRaw = totalResultsMatch.Groups["totalResultsAvailable"].Value; response.TotalResultsAvailable = int.Parse(totalResultsRaw.Replace("\"", "").Replace(",", "")); int lastResult = int.Parse(totalResultsMatch.Groups["lastResult"].Value.Replace("\"", "").Replace(",", "")); if (lastResult >= response.TotalResultsAvailable) { break; } } response.Results = (SearchResult[]) results.ToArray(typeof(SearchResult)); return response; }
/// <summary> /// Runs the given query against Google Image Search and returns a SearchResponse object with details /// for each returned image. /// </summary> /// <param name="query">The query to be sent.</param> /// <param name="startPosition">The index of the first item to be retrieved (must be positive).</param> /// <param name="resultsRequested">The number of results to be retrieved (must be between 1 and (MAX_RESULTS - startPosition)</param> /// <param name="filterSimilarResults">Set to 'true' if you want Google to automatically omit similar entries. Set to 'false' if you want to retrieve every matching image.</param> /// <param name="safeSearch">Indicates what level of SafeSearch to use.</param> /// <returns>A SearchResponse object with details for each returned image.</returns> public static SearchResponse SearchImages(string query, int startPosition, int resultsRequested, bool filterSimilarResults, SafeSearchFiltering safeSearch) { // Check preconditions if (resultsRequested < 1) { throw new ArgumentOutOfRangeException("resultsRequested", "Value must be positive"); } else if (startPosition < 0) { throw new ArgumentOutOfRangeException("startPosition", "Value must be positive"); } else if (resultsRequested + startPosition > MAX_RESULTS) { throw new ArgumentOutOfRangeException("resultsRequested", "Sorry, Google does not serve more than 1000 results for any query"); } string safeSearchStr = safeSearch.ToString().ToLower(); SearchResponse response = new SearchResponse(); ArrayList results = new ArrayList(); // Since Google returns 20 results at a time, we have to run the query over and over again (each // time with a different starting position) until we get the requested number of results. // Note: During changes to the Google Image search from around January 2007 they // stopped returned a fixed number of 20 images for each query. Instead, they try to return // the amount of images that would fit the browser window. It seems that the "&ndsp" parameter // indicates the number of images per query, so I'm using it manually. // This whole mechanism doesn't seem to work very accurately - in some cases I receive more images // when my browser window is smaller rather than larger. Also, they seem to not always retrieve the // requested number of results (usually I get 21 results when querying programmatically). // I'm leaving the flag on anyway, in case they decide to start respecting it correctly... for (int i = 0; i < resultsRequested; i += RESULTS_PER_QUERY) { string requestUri = string.Format("http://images.google.com/images?q={0}&ndsp={1}&start={2}&filter={3}&safe={4}", query, RESULTS_PER_QUERY.ToString(), (startPosition + i).ToString(), (filterSimilarResults) ? "1" : "0", safeSearchStr); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(requestUri); string resultPage = string.Empty; using (HttpWebResponse httpWebResponse = (HttpWebResponse)request.GetResponse()) { using (Stream responseStream = httpWebResponse.GetResponseStream()) { using (StreamReader reader = new StreamReader(responseStream)) { resultPage = reader.ReadToEnd(); } } } // Here's the parsing of the images' details. If the html format changes, that's most probably where we will have to update the code // There are two types of regions in the HTML we have to parse in order to gather all the information // about the images: // 1. The image's url, thumbnail url and thumbnail width&height will be extracted // by the imagesRegex and imagesMatches objects. // 2. The image's width&height&size will be extracted by the dataRegex and dataMatches objects. Regex imagesRegex = new Regex(imagesRegexStr); Regex dataRegex = new Regex(dataRegexStr); MatchCollection imagesMatches = imagesRegex.Matches(resultPage); MatchCollection dataMatches = dataRegex.Matches(resultPage); if ((imagesMatches == null) || (imagesMatches.Count == 0) || (dataMatches == null) || (dataMatches.Count == 0)) { Trace.WriteLine("Parsing of query " + query + " failed - collections count mismatch"); break; } // The two MatchCollections should include an entry for each returned image. Therefore, // if they don't have the same number of items, then the parsing has failed and we // stop the query (this is just a provision, in reality using these expressions // for many thousands of queries it never broke here :-) if (imagesMatches.Count != dataMatches.Count) { throw new Exception("Parsing of the response failed for url: " + requestUri); } // Build a SearchResult object for each image for (int j = 0; j < imagesMatches.Count && (i + j) < resultsRequested; j++) { Match imageMatch = imagesMatches[j]; Match dataMatch = dataMatches[j]; SearchResult result = new SearchResult(); result.ImageUrl = imageMatch.Groups["imgurl"].Value; result.ThumbnailCode = imageMatch.Groups["code"].Value; result.ThumbnailWidth = int.Parse(imageMatch.Groups["width"].Value); result.ThumbnailHeight = int.Parse(imageMatch.Groups["height"].Value); result.ImageWidth = int.Parse(dataMatch.Groups["width"].Value); result.ImageHeight = int.Parse(dataMatch.Groups["height"].Value); // Since the value in the HTML is in kb, this is only an approximation to the number of bytes result.ImageSize = int.Parse(dataMatch.Groups["size"].Value) * 1000; results.Add(result); } // Extract the total number of results available and make sure we didn't reach the end of the results Regex totalResultsRegex = new Regex(totalResultsRegexStr); Match totalResultsMatch = totalResultsRegex.Match(resultPage); string totalResultsRaw = totalResultsMatch.Groups["totalResultsAvailable"].Value; response.TotalResultsAvailable = int.Parse(totalResultsRaw.Replace("\"", "").Replace(",", "")); int lastResult = int.Parse(totalResultsMatch.Groups["lastResult"].Value.Replace("\"", "").Replace(",", "")); if (lastResult >= response.TotalResultsAvailable) { break; } } response.Results = (SearchResult[])results.ToArray(typeof(SearchResult)); return(response); }
/// <summary> /// Runs the given query against Google Image Search and returns a SearchResponse object with details /// for each returned image. /// </summary> /// <param name="query">The query to be sent.</param> /// <param name="startPosition">The index of the first item to be retrieved (must be positive).</param> /// <param name="resultsRequested">The number of results to be retrieved (must be between 1 and (MAX_RESULTS - startPosition)</param> /// <param name="filterSimilarResults">Set to 'true' if you want Google to automatically omit similar entries. Set to 'false' if you want to retrieve every matching image.</param> /// <param name="safeSearch">Indicates what level of SafeSearch to use.</param> /// <returns>A SearchResponse object with details for each returned image.</returns> public static SearchResponse SearchImages(string query, int startPosition, int resultsRequested, bool filterSimilarResults, SafeSearchFiltering safeSearch) { // Check preconditions if (resultsRequested < 1) { throw new ArgumentOutOfRangeException("resultsRequested", "Value must be positive"); } else if (startPosition < 0) { throw new ArgumentOutOfRangeException("startPosition", "Value must be positive"); } else if (resultsRequested + startPosition > MAX_RESULTS) { throw new ArgumentOutOfRangeException("resultsRequested", "Sorry, Google does not serve more than 1000 results for any query"); } string safeSearchStr = safeSearch.ToString().ToLower(); SearchResponse response = new SearchResponse(); ArrayList results = new ArrayList(); // Since Google returns 20 results at a time, we have to run the query over and over again (each // time with a different starting position) until we get the requested number of results. for (int i = 0; i < resultsRequested; i+=RESULTS_PER_QUERY) { string requestUri = string.Format("http://images.google.com/images?q={0}&start={1}&filter={2}&safe={3}", query, (startPosition+i).ToString(), (filterSimilarResults)?1.ToString():0.ToString(), safeSearchStr ); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(requestUri); string resultPage = string.Empty; using (HttpWebResponse httpWebResponse = (HttpWebResponse)request.GetResponse()) { using (Stream responseStream = httpWebResponse.GetResponseStream()) { using (StreamReader reader = new StreamReader(responseStream)) { resultPage = reader.ReadToEnd(); } } } // Here's the parsing of the images' details. If the html format changes, that's most probably where we will have to update the code // There are two types of regions in the HTML we have to parse in order to gather all the information // about the images: // 1. The image's url, thumbnail url and thumbnail width&height will be extracted // by the imagesRegex and imagesMatches objects. // 2. The image's width&height&size will be extracted by the dataRegex and dataMatches objects. //Regex imagesRegex = new Regex(@"(\x3Ca\s+href=/imgres\x3Fimgurl=)(?<imgurl>http[^&>]*)([>&]{1})([^>]*)(>{1})(<img\ssrc\x3D)(""{0,1})(?<images>/images[^""\s>]*)([\s])+(width=)(?<width>[0-9,]*)\s+(height=)(?<height>[0-9,]*)"); //Regex dataRegex = new Regex(@"([^>]*)(>)\s{0,1}(<br>){0,1}\s{0,1}(?<width>[0-9,]*)\s+x\s+(?<height>[0-9,]*)\s+pixels\s+-\s+(?<size>[0-9,]*)(k)"); Regex imagesRegex = new Regex(imagesRegexStr); Regex dataRegex = new Regex(dataRegexStr); MatchCollection imagesMatches = imagesRegex.Matches(resultPage); MatchCollection dataMatches = dataRegex.Matches(resultPage); if ((imagesMatches == null) || (imagesMatches.Count == 0) || (dataMatches == null) || (dataMatches.Count == 0)) { Trace.WriteLine("Parsing of query " + query + " failed - collections count mismatch"); break; } // The two MatchCollections should include an entry for each returned image. Therefore, // if they don't have the same number of items, then the parsing has failed and we // stop the query (this is just a provision, in reality using these expressions // for many thousands of queries it never broke here :-) if (imagesMatches.Count != dataMatches.Count) { throw new Exception("Parsing of the response failed for url: " + requestUri); } // Build a SearchResult object for each image for (int j = 0; j < imagesMatches.Count && (i+j) < resultsRequested ; j++) { Match imageMatch = imagesMatches[j]; Match dataMatch = dataMatches[j]; SearchResult result = new SearchResult(); result.ImageUrl = imageMatch.Groups["imgurl"].Value; result.ThumbnailUrl = imageMatch.Groups["images"].Value; result.ThumbnailWidth = int.Parse(imageMatch.Groups["width"].Value); result.ThumbnailHeight = int.Parse(imageMatch.Groups["height"].Value); result.ImageWidth = int.Parse(dataMatch.Groups["width"].Value); result.ImageHeight = int.Parse(dataMatch.Groups["height"].Value); // Since the value in the HTML is in kb, this is only an approximation to the number of bytes result.ImageSize = int.Parse(dataMatch.Groups["size"].Value) * 1000; results.Add(result); } // Extract the total number of results available and make sure we didn't reach the end of the results //Regex totalResultsRegex = new Regex(@"(?<lastResult>[0-9,]*)(\s*</b>\s*)(of)(\s)+(about){0,1}(\s*<b>\s*)(?<totalResultsAvailable>[0-9,]*)"); Regex totalResultsRegex = new Regex(totalResultsRegexStr); Match totalResultsMatch = totalResultsRegex.Match(resultPage); string totalResultsRaw = totalResultsMatch.Groups["totalResultsAvailable"].Value; response.TotalResultsAvailable = int.Parse(totalResultsRaw.Replace("\"", "").Replace(",", "")); int lastResult = int.Parse(totalResultsMatch.Groups["lastResult"].Value.Replace("\"", "").Replace(",", "")); if (lastResult >= response.TotalResultsAvailable) { break; } } response.Results = (SearchResult[]) results.ToArray(typeof(SearchResult)); return response; }