Ejemplo n.º 1
0
        public static List <string> Scrub(string command, int limit = 30)
        {
            string binguri = "http://www.bing.com/images/search?q=";

            Console.WriteLine("Searching for URLs...");
            string        html   = Scrubber.GetPageContent(binguri + command);
            string        start  = ";\"><img";
            string        end    = "class=\"tit\"";
            List <string> source = Scrubber.GetList(html, start, end);

            for (int i = 0; i < source.Count; i++)
            {
                source[i] = source[i].Substring(source[i].IndexOf("href=") + 6);
                source[i] = source[i].Substring(0, source[i].Length - 2);
                Console.WriteLine("URL Found: " + source[i]);
            }
            List <string> images = new List <string>()
            {
            };

            foreach (string page in source)
            {
                try {
                    Console.WriteLine("Scrubbing Page: " + page);
                    string code = Scrubber.GetPageContent(page);
                    Console.WriteLine("Getting Source Images");
                    List <string> sourceImgs = Scrubber.GetList(code, "<img", ">");
                    foreach (string sourceImg in sourceImgs)
                    {
                        // filter out the links that reference something on the page and instead focus on the links that link directly to the image, but this has to be checked at the img src level
                        //  sourceImg.Substring(0,2) == "ht" || sourceImg.Substring(0,2) == "//"

                        if (Scrubber.CheckLink(sourceImg, command)) // check to see if the image tag has a word from the original command in it, attempting to circumvent getting useless photos
                        {
                            Console.WriteLine("Source of scrub " + sourceImg);
                            int  position = sourceImg.IndexOf("http"); // find the actual link to the image
                            char endQuote = '"';
                            if (position < 0)
                            {
                                position = sourceImg.IndexOf("src=") + 5; // not found with http? try src
                            }
                            endQuote = sourceImg[position - 1];
                            string src = sourceImg.Substring(position);                                             // get the link to the actual image
                            Console.WriteLine("src: " + src);
                            try {                                                                                   // try to catch 404 exceptions... etc
                                src = src.Substring(0, src.IndexOf(endQuote));                                      // cut off the rest of the string after the link ends
                            } catch (Exception e) {} // empty catch - ignore errors!
                            if ((src.Substring(0, 2) == "ht" || src.Substring(0, 2) == "//") && CheckForImage(src)) // make sure the link starts with http or // so its a full path link.
                            {
                                images.Add(src);
                                Console.WriteLine("Adding Image: " + src);
                            }
                            Console.WriteLine("Image COunt: " + images.Count);
                            if (images.Count >= limit)
                            {
                                return(images);
                            }
                        }
                    }
                } catch (WebException ex) {}
            }
            return(images);
        } // end func scrub
Ejemplo n.º 2
0
 public List <string> GetImages(int limit = 6)
 {
     return(Scrubber.Search(GetName(), limit));
 }