Exemple #1
0
        /// <summary>
        /// Main routine, this is where the crawler should be called first
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            //Example query
            //Look for Appartmens with 1 bedroom and with pics
            Query q1 = new Query();

            q1.query = "http://chicago.craigslist.org/search/chc/apa?maxAsk=1500&bedrooms=1&hasPic=1";
            //defines the latitude and longitude rectangle for the search area
            q1.topLatN    = 41.920114;
            q1.bottomLatN = 41.8948;
            q1.rightLonE  = -87.6000;
            q1.leftLonE   = -87.6446;
            //recipient of that search
            q1.emailRecipient = "*****@*****.**";
            //phone number to send a text message (feature offred from TMobile to their users)
            q1.textRecipient = "*****@*****.**";
            //unique ID for that example query
            q1.Id = new Guid("1d9beafd-4290-465f-adc8-2a2d83b43f33");

            //another query for someone else
            Query q2 = new Query();

            q2.query          = "http://chicago.craigslist.org/search/apa?maxAsk=2800&bedrooms=2&hasPic=1";
            q2.topLatN        = 41.9125;
            q2.bottomLatN     = 41.8948;
            q2.rightLonE      = -87.6000;
            q2.leftLonE       = -87.6446;
            q2.emailRecipient = "*****@*****.**";
            q2.Id             = new Guid("abb15e58-49ce-4df3-aef9-4218a636cc2d");
            q2.enabled        = false;

            //another query for someone else
            Query q3 = new Query();

            q3.query          = "http://chicago.craigslist.org/search/chc/roo?maxAsk=800&hasPic=1";
            q3.topLatN        = 41.926331;
            q3.bottomLatN     = 41.891632;
            q3.rightLonE      = -87.6000;
            q3.leftLonE       = -87.719356;
            q3.emailRecipient = "*****@*****.**";
            q3.Id             = new Guid("170E2366-0739-48F7-A314-92F79B48E1E6".ToLower());
            q3.enabled        = false;

            List <Query> queries = new List <Query> {
                q1, q2, q3
            };

            SMTPTools.TrySMTP();

            LoadExploredFromAppData(queries);

            RunMainLoop(queries);
        }
Exemple #2
0
        /// <summary>
        /// Executes one query on craigslist
        /// Calls the page by using the query link, parse the HTML and see if the post matches the query
        /// </summary>
        /// <param name="q"></param>
        static void ExecuteQuery(Query q)
        {
            string searchResults = "";

            //for (int i = 0; i < 1; i+=100)
            LogFile.Write("Quering:" + q.query);
            searchResults += DownloadPage(q.query);// + "&s=" + i);

            if (searchResults != null)
            {
                int           countNew       = 0;
                int           countTotal     = 0;
                List <string> exploredOnPage = new List <string>();
                Match         m = regexPost.Match(searchResults);
                while (m.Success)
                {
                    LogFile.Write("Found Link: http://chicago.craigslist.org" + m.Groups[0].Value);
                    if (!exploredOnPage.Contains(m.Groups[0].Value))
                    {
                        exploredOnPage.Add(m.Groups[0].Value);
                        if (!q.exploredPosts.Contains(m.Groups[0].Value))
                        {
                            q.newLinks++;
                            countNew++;
                            q.exploredPosts.Add(m.Groups[0].Value);

                            Console.WriteLine(countNew + ") Found new URL for " + q.emailRecipient + ", parsing for location...");
                            string postPage = DownloadPage("http://chicago.craigslist.org" + m.Groups[0].Value);
                            Match  mc       = regexCoord.Match(postPage);

                            if (mc.Success)
                            {
                                #region compare location, then images and send by email or text
                                double lat = Convert.ToDouble(mc.Groups[1].Value);
                                double lon = Convert.ToDouble(mc.Groups[2].Value);

                                if (lat > q.bottomLatN &&
                                    lat < q.topLatN &&
                                    lon > q.leftLonE &&
                                    lon < q.rightLonE)
                                {
                                    #region compares images and send email/text
                                    Match mi = regexSmallPicture.Match(postPage);

                                    int           totalPics   = 0;
                                    int           matchedPics = 0;
                                    List <string> exploredImageForThisPost = new List <string>();
                                    while (mi.Success)
                                    {
                                        string localFilename = mi.Groups[0].Value.Replace("http://images.craigslist.org/", "");
                                        if (!exploredImageForThisPost.Contains(localFilename))
                                        {
                                            totalPics++;
                                            using (WebClient client = new WebClient())
                                            {
                                                int imgSize = 16;
                                                #region download and compare image
                                                client.DownloadFile(mi.Groups[0].Value, localFilename);
                                                Bitmap img     = (Bitmap)FromFile(localFilename, new Size(imgSize, imgSize));
                                                byte[] BWarray = new byte[imgSize * imgSize];
                                                int    idx     = 0;
                                                int    sum     = 0;
                                                for (int i = 0; i < img.Width; i++)
                                                {
                                                    for (int j = 0; j < img.Height; j++)
                                                    {
                                                        Color c = img.GetPixel(i, j);
                                                        BWarray[idx] = (byte)((c.R + c.G + c.B) / 3);
                                                        sum         += BWarray[idx];
                                                        idx++;
                                                    }
                                                }
                                                int avg = sum / idx;

                                                bool[] boolArray = new bool[BWarray.Length];
                                                for (int i = 0; i < BWarray.Length; i++)
                                                {
                                                    boolArray[i] = BWarray[i] > avg;
                                                }
                                                BitArray arr  = new BitArray(boolArray);
                                                byte[]   data = new byte[arr.Length / 8];
                                                arr.CopyTo(data, 0);

                                                string hash        = string.Join(string.Empty, Array.ConvertAll(data, b => b.ToString("X2")));
                                                int    minDistance = int.MaxValue;
                                                foreach (String hexaImage in q.exploredImages)
                                                {
                                                    byte[] bytes = Enumerable.Range(0, hexaImage.Length)
                                                                   .Where(x => x % 2 == 0)
                                                                   .Select(x => Convert.ToByte(hexaImage.Substring(x, 2), 16))
                                                                   .ToArray();
                                                    BitArray bits = new BitArray(bytes);
                                                    int      dist = 0;
                                                    for (int i = 0; i < bits.Length; i++)
                                                    {
                                                        dist += (bits[i] == arr[i] ? 0 : 1);
                                                    }
                                                    minDistance = (int)Math.Min(minDistance, dist);
                                                }

                                                if (minDistance != 0)
                                                {
                                                    q.exploredImages.Add(hash);
                                                    CraigslistWatcher2.Settings.Default.exploredImages += hash + ",";
                                                    CraigslistWatcher2.Settings.Default.Save();
                                                }
                                                else
                                                {
                                                    LogFile.Write("Picture match previous post:" + localFilename);
                                                    matchedPics++;
                                                }
                                                exploredImageForThisPost.Add(localFilename);
                                                //File.Delete(localFilename);
                                                #endregion
                                            }
                                        }
                                        mi = mi.NextMatch();
                                    }

                                    if ((matchedPics / totalPics) <= 0.5) //less than 50% of the images were already scanned
                                    {
                                        if (q.enabled)
                                        {
                                            #region creating email
                                            string title = "";
                                            Match  mt    = regexTitle.Match(postPage);
                                            if (mt.Success)
                                            {
                                                title = mt.Groups[1].Value;
                                            }

                                            Match         li          = regexLargePicture.Match(postPage);
                                            List <string> largeImages = new List <string>();
                                            while (li.Success)
                                            {
                                                if (!largeImages.Contains(li.Groups[0].Value))
                                                {
                                                    largeImages.Add(li.Groups[0].Value);
                                                }
                                                li = li.NextMatch();
                                            }

                                            string body = "I found a new match in the desired area."
                                                          + "See <a href=\"" + "http://chicago.craigslist.org" + m.Groups[0].Value + "\">" + "http://chicago.craigslist.org" + m.Groups[0].Value + "</a><br>"
                                                          + "<br><br><br>";

                                            foreach (string src in largeImages)
                                            {
                                                body += "<img src=\"" + src + "\" alt=\"craigslist image\" style=\"width:300px\"><br>";
                                            }

                                            body += "queryId=" + q.Id;
                                            SMTPTools.SendMail(q.emailRecipient, "MATCH! " + title, body, true);
                                            LogFile.Write("Mail sent");
                                            #endregion

                                            #region creating text message though smtp

                                            if (!String.IsNullOrEmpty(q.textRecipient))
                                            {
                                                string textBody = "http://chicago.craigslist.org" + m.Groups[0].Value;
                                                SMTPTools.SendMail(q.textRecipient, "MATCH! " + title, textBody, false);
                                                LogFile.Write("Text message sent");
                                            }

                                            #endregion
                                        }
                                        LogFile.Write("There is a new match:" + "http://chicago.craigslist.org" + m.Groups[0].Value);
                                        Console.WriteLine("There is a new match:");
                                        Console.WriteLine("http://chicago.craigslist.org" + m.Groups[0].Value);
                                        q.validatedAndSent++;
                                    }
                                    else
                                    {
                                        LogFile.Write("Found identical images, probably duplicated post");
                                        Console.WriteLine("Math has identical images, probably a duplicated post: http://chicago.craigslist.org" + m.Groups[0].Value);
                                        q.duplicateImage++;
                                    }
                                    #endregion
                                }
                                else
                                {
                                    LogFile.Write("Located outside of desired area");
                                    Console.WriteLine("Located outside of desired area");
                                    q.outsideArea++;
                                }
                                #endregion
                            }
                            else
                            {
                                LogFile.Write("No coordinates found");
                                Console.WriteLine("No coordinates found");
                                q.noLocation++;
                            }
                        }
                        else
                        {
                            LogFile.Write("Link already explored during a previous query");
                            break;
                        }
                        countTotal++;
                    }
                    //else
                    //LogFile.Write("Link already explored for this query response");
                    m = m.NextMatch();
                }
                Console.WriteLine(DateTime.Now.ToString() + " Total Links: " + countTotal + " New:" + countNew);
            }
        }