public void BuildFeedQueue(string IP, DataAccessDataContext DataContext) { try { foreach (var item in DataContext.GetFeedList(IP).OrderBy(x => x.Timestamp)) { FeedQueues[IP].AddLast(new Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>(SiteSections[IP].First(x => x.Name == item.SiteSection), Cities[IP].First(x => x.Name == item.City), 0, item.Timestamp, SubCities[IP].FirstOrDefault(x => x.SubCity == item.SubCity))); } } catch (Exception ex) { if (ex.Message.Contains("Rerun the transaction") || ex.Message.Contains("Rerun the transaction")) { lock (EventLogKey) { EventLog.WriteEntry("An error occurred while attempting to retrieve database records.\n\n" + PrintException(ex), EventLogEntryType.Warning); } BuildFeedQueue(IP, DataContext); } else { lock (EventLogKey) { EventLog.WriteEntry("An error occurred while attempting to retrieve database records.\n\n" + PrintException(ex), EventLogEntryType.Error); } } } }
public void FetchNextWhatchamacallit(string IP, IWebProxy proxy, DataAccessDataContext DataContext) { Queue <Listing> ListingQueue = ListingQueues[IP]; lock (KeyChain[IP]) { if (ListingQueue.Count > 0) { HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create("http://" + ListingQueue.Peek().CLCity.Name + ".craigslist.org/" + (ListingQueue.Peek().CLSubCity == null ? "" : (ListingQueue.Peek().CLSubCity.SubCity + "/")) + ListingQueue.Peek().CLSiteSection.Name + "/" + ListingQueue.Peek().Id.ToString() + ".html"); { hwr.Proxy = proxy; hwr.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate"); hwr.KeepAlive = false; hwr.UserAgent = Properties.Settings.Default.UserAgent; AsyncRequestStruct ars = new AsyncRequestStruct() { request = hwr, parameters = ListingQueue.Peek(), IP = IP, proxy = proxy, DataContext = DataContext }; ars.stopwatch = new Stopwatch(); ars.stopwatch.Start(); hwr.BeginGetResponse(new AsyncCallback(ParseListingInfo), ars); } ProcessingListings[IP].Add(ListingQueue.Dequeue()); } else { if (FeedQueues[IP].Count == 0) { BuildFeedQueue(IP, DataContext); } var selectedItem = FeedQueues[IP].First.Value; FeedQueues[IP].RemoveFirst(); HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create("http://" + selectedItem.Item2.Name + ".craigslist.org/search/" + (selectedItem.Item5 == null ? "" : (selectedItem.Item5.SubCity + "/")) + selectedItem.Item1.Name + "?s=" + selectedItem.Item3.ToString()); { hwr.Proxy = proxy; hwr.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate"); hwr.KeepAlive = false; hwr.UserAgent = Properties.Settings.Default.UserAgent; AsyncRequestStruct ars = new AsyncRequestStruct() { request = hwr, parameters = selectedItem, IP = IP, proxy = proxy, DataContext = DataContext }; ars.stopwatch = new Stopwatch(); ars.stopwatch.Start(); hwr.BeginGetResponse(new AsyncCallback(ParseFeed), ars); } } } }
private void SubmitData(DataAccessDataContext DataContext) { try { lock (MasterKey) { if (DataContext.GetChangeSet().Inserts.OfType <Listing>().Count() >= Properties.Settings.Default.MinSubmissionBundleSize) { DataContext.SubmitChanges(); } } } catch (Exception ex) { StringBuilder debugmsg = new StringBuilder(); var DuplicateListings = DataContext.Listings.GroupBy(x => x.Id).Where(x => x.Count() > 1).Select(x => x.Key).ToArray(); var DuplicateAttributes = DataContext.ListingAttributes.GroupBy(x => x.AttributeID).Where(x => x.Count() > 1).Select(x => x.Key).ToArray(); debugmsg.AppendLine("Duplicate Listings:"); foreach (long listingid in DuplicateListings) { debugmsg.AppendLine("[" + listingid + "]"); } debugmsg.AppendLine("Duplicate Attributes:"); foreach (int attrid in DuplicateAttributes) { debugmsg.AppendLine("[" + attrid + "]"); } debugmsg.AppendLine("Existing Listings:"); foreach (Listing li in DataContext.GetChangeSet().Inserts.OfType <Listing>()) { if (DataContext.Listings.Any(x => x.Id == li.Id)) { debugmsg.AppendLine("[" + li.Id + "]"); } } debugmsg.AppendLine("Existing Attributes:"); foreach (ListingAttribute li in DataContext.GetChangeSet().Inserts.OfType <ListingAttribute>()) { if (DataContext.ListingAttributes.Any(x => x.AttributeID == li.AttributeID)) { debugmsg.AppendLine("[" + li.AttributeID + "]"); } } string wholemessage = "Failed to submit new listings to database.\n\n" + ex.Message + "\n\n" + debugmsg.ToString(); wholemessage = wholemessage.Substring(0, Math.Min(short.MaxValue, wholemessage.Length)); lock (EventLogKey) { EventLog.WriteEntry(wholemessage, EventLogEntryType.Error); } } }
public void init() { EventLog.Source = "Craigslist Crawler"; ServicePointManager.DefaultConnectionLimit = int.MaxValue; ServicePointManager.Expect100Continue = false; ServicePointManager.CheckCertificateRevocationList = false; ConnectionCooldown = Properties.Settings.Default.ConnectionCooldown; try { DataAccessDataContext dadc = new DataAccessDataContext(); if (Properties.Settings.Default.EnableConcurrentIPLimits) { IPs = dadc.CLCities.Where(x => x.Enabled).Select(x => x.IP).Distinct().ToArray(); } else { IPs = new string[] { "0" } }; var proxies = dadc.Proxies.Where(x => x.Enabled).ToArray(); foreach (string ip in IPs) { DataAccessDataContext datacontext = new DataAccessDataContext(); FeedQueues.Add(ip, new LinkedList <Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity> >()); ListingQueues.Add(ip, new Queue <Listing>()); ListingFailures.Add(ip, new Dictionary <Listing, int>()); ProcessingListings.Add(ip, new List <Listing>()); PreQueueListings.Add(ip, new List <Listing>()); if (Properties.Settings.Default.EnableConcurrentIPLimits) { Cities.Add(ip, datacontext.CLCities.Where(x => x.Enabled && x.IP == ip).ToArray()); CompletedListingIds.Add(ip, new HashSet <long>(datacontext.Listings.Where(x => x.CLCity.IP == ip).Select(x => x.Id))); } else { Cities.Add(ip, datacontext.CLCities.Where(x => x.Enabled).ToArray()); CompletedListingIds.Add(ip, new HashSet <long>(datacontext.Listings.Select(x => x.Id))); } SubCities.Add(ip, datacontext.CLSubCities.ToArray()); SiteSections.Add(ip, datacontext.CLSiteSections.Where(x => x.Enabled).ToArray()); KeyChain.Add(ip, new object()); BuildFeedQueue(ip, datacontext); if (proxies.Length > 0) { foreach (Proxy prox in proxies) { FetchNextWhatchamacallit(ip, new WebProxy(prox.IP, prox.Port), datacontext); } } else { FetchNextWhatchamacallit(ip, null, datacontext); } } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("An error occurred while attempting to retrieve database records.\n\n" + ex.Message, EventLogEntryType.Error); } return; } lock (EventLogKey) { EventLog.WriteEntry("Initialization Success!"); } }