예제 #1
0
 public void BuildFeedQueue(string IP, DataAccessDataContext DataContext)
 {
     try
     {
         foreach (var item in DataContext.GetFeedList(IP).OrderBy(x => x.Timestamp))
         {
             FeedQueues[IP].AddLast(new Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>(SiteSections[IP].First(x => x.Name == item.SiteSection), Cities[IP].First(x => x.Name == item.City), 0, item.Timestamp, SubCities[IP].FirstOrDefault(x => x.SubCity == item.SubCity)));
         }
     }
     catch (Exception ex)
     {
         if (ex.Message.Contains("Rerun the transaction") || ex.Message.Contains("Rerun the transaction"))
         {
             lock (EventLogKey)
             {
                 EventLog.WriteEntry("An error occurred while attempting to retrieve database records.\n\n" + PrintException(ex), EventLogEntryType.Warning);
             }
             BuildFeedQueue(IP, DataContext);
         }
         else
         {
             lock (EventLogKey)
             {
                 EventLog.WriteEntry("An error occurred while attempting to retrieve database records.\n\n" + PrintException(ex), EventLogEntryType.Error);
             }
         }
     }
 }
예제 #2
0
        public void FetchNextWhatchamacallit(string IP, IWebProxy proxy, DataAccessDataContext DataContext)
        {
            Queue <Listing> ListingQueue = ListingQueues[IP];

            lock (KeyChain[IP])
            {
                if (ListingQueue.Count > 0)
                {
                    HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create("http://" + ListingQueue.Peek().CLCity.Name + ".craigslist.org/" + (ListingQueue.Peek().CLSubCity == null ? "" : (ListingQueue.Peek().CLSubCity.SubCity + "/")) + ListingQueue.Peek().CLSiteSection.Name + "/" + ListingQueue.Peek().Id.ToString() + ".html");
                    {
                        hwr.Proxy = proxy;
                        hwr.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
                        hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate");
                        hwr.KeepAlive = false;
                        hwr.UserAgent = Properties.Settings.Default.UserAgent;

                        AsyncRequestStruct ars = new AsyncRequestStruct()
                        {
                            request = hwr, parameters = ListingQueue.Peek(), IP = IP, proxy = proxy, DataContext = DataContext
                        };

                        ars.stopwatch = new Stopwatch();
                        ars.stopwatch.Start();

                        hwr.BeginGetResponse(new AsyncCallback(ParseListingInfo), ars);
                    }

                    ProcessingListings[IP].Add(ListingQueue.Dequeue());
                }
                else
                {
                    if (FeedQueues[IP].Count == 0)
                    {
                        BuildFeedQueue(IP, DataContext);
                    }

                    var selectedItem = FeedQueues[IP].First.Value;
                    FeedQueues[IP].RemoveFirst();

                    HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create("http://" + selectedItem.Item2.Name + ".craigslist.org/search/" + (selectedItem.Item5 == null ? "" : (selectedItem.Item5.SubCity + "/")) + selectedItem.Item1.Name + "?s=" + selectedItem.Item3.ToString());

                    {
                        hwr.Proxy = proxy;
                        hwr.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
                        hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate");
                        hwr.KeepAlive = false;
                        hwr.UserAgent = Properties.Settings.Default.UserAgent;
                        AsyncRequestStruct ars = new AsyncRequestStruct()
                        {
                            request = hwr, parameters = selectedItem, IP = IP, proxy = proxy, DataContext = DataContext
                        };

                        ars.stopwatch = new Stopwatch();
                        ars.stopwatch.Start();

                        hwr.BeginGetResponse(new AsyncCallback(ParseFeed), ars);
                    }
                }
            }
        }
예제 #3
0
        private void SubmitData(DataAccessDataContext DataContext)
        {
            try
            {
                lock (MasterKey)
                {
                    if (DataContext.GetChangeSet().Inserts.OfType <Listing>().Count() >= Properties.Settings.Default.MinSubmissionBundleSize)
                    {
                        DataContext.SubmitChanges();
                    }
                }
            }
            catch (Exception ex)
            {
                StringBuilder debugmsg = new StringBuilder();

                var DuplicateListings   = DataContext.Listings.GroupBy(x => x.Id).Where(x => x.Count() > 1).Select(x => x.Key).ToArray();
                var DuplicateAttributes = DataContext.ListingAttributes.GroupBy(x => x.AttributeID).Where(x => x.Count() > 1).Select(x => x.Key).ToArray();

                debugmsg.AppendLine("Duplicate Listings:");
                foreach (long listingid in DuplicateListings)
                {
                    debugmsg.AppendLine("[" + listingid + "]");
                }

                debugmsg.AppendLine("Duplicate Attributes:");
                foreach (int attrid in DuplicateAttributes)
                {
                    debugmsg.AppendLine("[" + attrid + "]");
                }

                debugmsg.AppendLine("Existing Listings:");
                foreach (Listing li in DataContext.GetChangeSet().Inserts.OfType <Listing>())
                {
                    if (DataContext.Listings.Any(x => x.Id == li.Id))
                    {
                        debugmsg.AppendLine("[" + li.Id + "]");
                    }
                }

                debugmsg.AppendLine("Existing Attributes:");
                foreach (ListingAttribute li in DataContext.GetChangeSet().Inserts.OfType <ListingAttribute>())
                {
                    if (DataContext.ListingAttributes.Any(x => x.AttributeID == li.AttributeID))
                    {
                        debugmsg.AppendLine("[" + li.AttributeID + "]");
                    }
                }

                string wholemessage = "Failed to submit new listings to database.\n\n" + ex.Message + "\n\n" + debugmsg.ToString();
                wholemessage = wholemessage.Substring(0, Math.Min(short.MaxValue, wholemessage.Length));

                lock (EventLogKey)
                {
                    EventLog.WriteEntry(wholemessage, EventLogEntryType.Error);
                }
            }
        }
예제 #4
0
        public void init()
        {
            EventLog.Source = "Craigslist Crawler";
            ServicePointManager.DefaultConnectionLimit         = int.MaxValue;
            ServicePointManager.Expect100Continue              = false;
            ServicePointManager.CheckCertificateRevocationList = false;

            ConnectionCooldown = Properties.Settings.Default.ConnectionCooldown;
            try
            {
                DataAccessDataContext dadc = new DataAccessDataContext();
                if (Properties.Settings.Default.EnableConcurrentIPLimits)
                {
                    IPs = dadc.CLCities.Where(x => x.Enabled).Select(x => x.IP).Distinct().ToArray();
                }
                else
                {
                    IPs = new string[] { "0" }
                };

                var proxies = dadc.Proxies.Where(x => x.Enabled).ToArray();

                foreach (string ip in IPs)
                {
                    DataAccessDataContext datacontext = new DataAccessDataContext();
                    FeedQueues.Add(ip, new LinkedList <Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity> >());
                    ListingQueues.Add(ip, new Queue <Listing>());
                    ListingFailures.Add(ip, new Dictionary <Listing, int>());
                    ProcessingListings.Add(ip, new List <Listing>());
                    PreQueueListings.Add(ip, new List <Listing>());

                    if (Properties.Settings.Default.EnableConcurrentIPLimits)
                    {
                        Cities.Add(ip, datacontext.CLCities.Where(x => x.Enabled && x.IP == ip).ToArray());
                        CompletedListingIds.Add(ip, new HashSet <long>(datacontext.Listings.Where(x => x.CLCity.IP == ip).Select(x => x.Id)));
                    }
                    else
                    {
                        Cities.Add(ip, datacontext.CLCities.Where(x => x.Enabled).ToArray());
                        CompletedListingIds.Add(ip, new HashSet <long>(datacontext.Listings.Select(x => x.Id)));
                    }

                    SubCities.Add(ip, datacontext.CLSubCities.ToArray());
                    SiteSections.Add(ip, datacontext.CLSiteSections.Where(x => x.Enabled).ToArray());

                    KeyChain.Add(ip, new object());

                    BuildFeedQueue(ip, datacontext);
                    if (proxies.Length > 0)
                    {
                        foreach (Proxy prox in proxies)
                        {
                            FetchNextWhatchamacallit(ip, new WebProxy(prox.IP, prox.Port), datacontext);
                        }
                    }
                    else
                    {
                        FetchNextWhatchamacallit(ip, null, datacontext);
                    }
                }
            }
            catch (Exception ex)
            {
                lock (EventLogKey)
                {
                    EventLog.WriteEntry("An error occurred while attempting to retrieve database records.\n\n" + ex.Message, EventLogEntryType.Error);
                }
                return;
            }

            lock (EventLogKey)
            {
                EventLog.WriteEntry("Initialization Success!");
            }
        }