Beispiel #1
0
        public List <String> FindImages(String question, String userAgent)
        {
            List <String> imagesList = new List <String>();

            try
            {
                String googleUrl = "https://www.google.com/search?tbm=isch&q=" + question.Replace(",", "");

                NSoup.Nodes.Document htmlDoc = NSoupClient.Connect(googleUrl).UserAgent(userAgent).Timeout(10 * 1000).Get();
                //Handling correctly auto redirects...
                checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent);

                /*
                 * //This is old method
                 * NSoup.Select.Elements images = htmlDoc.Select("div.rg_di.rg_el.ivg-i img"); //div with class="rg_di rg_el ivg-i" containing img
                 * foreach (NSoup.Nodes.Element img in images) {
                 *  NSoup.Select.Elements links = img.Parent.Select("a[href]");
                 *  if (links.Count() > 0) { //is there a link around img?
                 *      NSoup.Nodes.Element link = img.Parent.Select("a[href]").First();
                 *      String href = img.Parent.Attr("abs:href"); //link which needs to be parsed to get the full img url
                 *      Regex regex = new Regex("imgurl=(.*?)&imgrefurl="); //Everything between "imgurl=" and "&imgrefurl="
                 *      var v = regex.Match(href);
                 *      if (v != null && v.Groups.Count == 2) {
                 *          if (v.Groups[1].Value != String.Empty) {
                 *              String imgURL = v.Groups[1].ToString();
                 *              imagesList.Add(imgURL);
                 *          }
                 *      }
                 *  }
                 * }
                 */
                NSoup.Select.Elements div_with_images = htmlDoc.Select("div.y.yi div.rg_di.rg_bx.rg_el.ivg-i");     //div with class="y yi" containing div with class="rg_di rg_bx rg_el ivg-i"
                foreach (NSoup.Nodes.Element div_with_image in div_with_images)
                {
                    NSoup.Nodes.Element rg_meta_div = div_with_image.Select("div.rg_meta").First();
                    String text_where_the_img_is    = rg_meta_div.ToString();
                    Regex  regex = new Regex("ou&quot;:&quot;(.*?)&quot;");    //Everything between "ou&quot;:&quot;" and "&quot;"
                    var    v     = regex.Match(text_where_the_img_is);
                    if (v != null && v.Groups.Count == 2)
                    {
                        if (v.Groups[1].Value != String.Empty)
                        {
                            String imgURL = v.Groups[1].ToString();
                            imagesList.Add(imgURL);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                this.Error = ex;
            }

            return(imagesList);
        }