partial void DeleteCLCity(CLCity instance);
partial void InsertCLCity(CLCity instance);
partial void UpdateCLCity(CLCity instance);
private void ParseFeed(IAsyncResult AsyncResult) { bool banned403 = false; Stopwatch sw = new Stopwatch(); sw.Start(); AsyncRequestStruct ars = (AsyncRequestStruct)AsyncResult.AsyncState; try { ars.stopwatch.Stop(); parseFeedConnectionCount++; parseFeedConnectionTime += ars.stopwatch.Elapsed; Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity> feedResource = (Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>)ars.parameters; CLSiteSection SiteSection = feedResource.Item1; CLCity City = feedResource.Item2; int depth = feedResource.Item3; DateTime lastStamp = feedResource.Item4; CLSubCity subCity = feedResource.Item5; HtmlDocument response = new HtmlDocument(); try { using (WebResponse wr = ars.request.EndGetResponse(AsyncResult)) { using (Stream stream = wr.GetResponseStream()) { using (StreamReader sr = new StreamReader(stream)) { response.LoadHtml(sr.ReadToEnd()); } } } } catch (Exception ex) { if (ex.Message.Contains("(403)")) //If a 403 occurs, remove the proxy from operation. { if (Properties.Settings.Default.DisabledBannedProxies) { WebProxy wp = (WebProxy)ars.request.Proxy; Proxy selectedProxy = ars.DataContext.Proxies.FirstOrDefault(x => x.IP == wp.Address.Host && x.Port == wp.Address.Port); if (selectedProxy != null) { selectedProxy.Enabled = false; lock (KeyChain[ars.IP]) { lock (MasterKey) { ars.DataContext.SubmitChanges(); } } } } banned403 = true; return; } lock (EventLogKey) { EventLog.WriteEntry("An error has occurred while retreiving a feed:\n\n" + ex.Message + "\n\n" + (ex.InnerException == null ? "" : ex.InnerException.Message) + "\n\n" + SiteSection.Name + "\n" + City.Name + "\n" + depth.ToString(), EventLogEntryType.Warning); } return; } HtmlNodeCollection rows = response.DocumentNode.SelectNodes("//p[@class='row' and @data-pid]"); if (rows == null) { //The eventlog below is not necessarily true. It's possible that the end of the feeds has been reached and there are no more listings. //EventLog.WriteEntry("Error while parsing feed. No connection errors thrown, but no results returned either.\n" + SiteSection.Name + "\n" + City.Name + "\n" + (subCity==null?"":("\n"+subCity.SubCity)) + "\n" + depth.ToString()); return; } bool NoUpdateTimesPosted = false; foreach (HtmlNode row in rows) { if (row.SelectSingleNode("a").Attributes["href"].Value.Contains("craigslist.")) { //This condition deals with "Nearby Areas". Links within the locale are relative -> /<sectionCode>/<Id>.html, whereas out of the locale are global -> http://<city.Name>/craigslist.org/<sectionCode>/<Id>.html return; } Listing listingSource = new Listing() { CLSiteSection = SiteSection, CLCity = City, CLSubCity = subCity, Timestamp = DateTime.Now }; //LASTUPDATED try { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='pl']/time"); if (hn != null && DateTime.Parse(hn.Attributes["datetime"].Value) <= lastStamp) { return; } else if (hn == null) { NoUpdateTimesPosted = true; } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Failure to parse listing update time.\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Error); } return; } //ID try { listingSource.Id = long.Parse(row.Attributes["data-pid"].Value); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Listing has an invalid ID:\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } continue; } lock (KeyChain[ars.IP]) { if (CompletedListingIds[ars.IP].Contains(listingSource.Id) || ListingQueues[ars.IP].Any(x => x.Id == listingSource.Id) || ProcessingListings[ars.IP].Any(x => x.Id == listingSource.Id) || PreQueueListings[ars.IP].Any(x => x.Id == listingSource.Id) ) { continue; } PreQueueListings[ars.IP].Add(listingSource); } //PRICE { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='price']"); if (hn != null) { try { string value = long.Parse(hn.InnerText.Replace("$", "").Replace(@"$", "")).ToString(); listingSource.ListingAttributes.Add( new ListingAttribute() { Name = "price", Value = value } ); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error parsing price listing data: " + hn.InnerText.Replace("$", "").Replace(@"$", "") + "\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } } } } //LOCALE { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='pnr']/small"); if (hn != null) { try { listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Locale", Value = hn.InnerText.Replace("(", "").Replace(")", "").Trim() }); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error parsing listing locale data: " + hn.InnerText.Replace("(", "").Replace(")", "").Trim() + "\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } } } } //HAS PICTURE, HAS MAP try { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='pnr']/span[@class='px']/span[@class='p']"); if (hn != null) { if (hn.InnerText.Contains("pic")) { listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Has Picture", Value = "True" }); } if (hn.InnerText.Contains("map")) { listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Has Map", Value = "True" }); } } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error while parsing pic and map presence:\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } } //TITLE try { listingSource.Title = row.SelectSingleNode("span[@class='txt']/span[@class='pl']/a[@class='hdrlnk']").InnerText; } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Listing skipped. Error parsing title from listing.\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } lock (KeyChain[ars.IP]) { PreQueueListings[ars.IP].Remove(listingSource); } continue; } lock (KeyChain[ars.IP]) { ListingQueues[ars.IP].Enqueue(listingSource); ProcessingListings[ars.IP].Remove(listingSource); PreQueueListings[ars.IP].Remove(listingSource); } } //If the for loop completes without a return, the next page needs to be looked at if (NoUpdateTimesPosted == false) { lock (KeyChain[ars.IP]) { FeedQueues[ars.IP].AddFirst(new Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>(feedResource.Item1, feedResource.Item2, feedResource.Item3 + 100, feedResource.Item4, feedResource.Item5)); } } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("An error has occurred while parsing the feed:\n\n" + PrintException(ex), EventLogEntryType.Error); } } finally { sw.Stop(); parseFeedProcessingTime += sw.Elapsed; parseFeedProcessingCount++; if (ConnectionCooldown > 0 && sw.Elapsed < TimeSpan.FromMilliseconds(ConnectionCooldown)) { Thread.Sleep(TimeSpan.FromMilliseconds(ConnectionCooldown) - sw.Elapsed); } try { if (banned403 == false) { FetchNextWhatchamacallit(ars.IP, ars.proxy, ars.DataContext); } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error fetching requests from next queue.\n\n" + PrintException(ex)); } } } }