Ejemplo n.º 1
0
 partial void DeleteCLSubCity(CLSubCity instance);
Ejemplo n.º 2
0
 partial void InsertCLSubCity(CLSubCity instance);
Ejemplo n.º 3
0
 partial void UpdateCLSubCity(CLSubCity instance);
Ejemplo n.º 4
0
        private void ParseFeed(IAsyncResult AsyncResult)
        {
            bool banned403 = false;

            Stopwatch sw = new Stopwatch();

            sw.Start();

            AsyncRequestStruct ars = (AsyncRequestStruct)AsyncResult.AsyncState;

            try
            {
                ars.stopwatch.Stop();
                parseFeedConnectionCount++;
                parseFeedConnectionTime += ars.stopwatch.Elapsed;

                Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity> feedResource = (Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>)ars.parameters;
                CLSiteSection SiteSection = feedResource.Item1;
                CLCity        City        = feedResource.Item2;
                int           depth       = feedResource.Item3;
                DateTime      lastStamp   = feedResource.Item4;
                CLSubCity     subCity     = feedResource.Item5;

                HtmlDocument response = new HtmlDocument();
                try
                {
                    using (WebResponse wr = ars.request.EndGetResponse(AsyncResult))
                    {
                        using (Stream stream = wr.GetResponseStream())
                        {
                            using (StreamReader sr = new StreamReader(stream))
                            {
                                response.LoadHtml(sr.ReadToEnd());
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    if (ex.Message.Contains("(403)"))    //If a 403 occurs, remove the proxy from operation.
                    {
                        if (Properties.Settings.Default.DisabledBannedProxies)
                        {
                            WebProxy wp = (WebProxy)ars.request.Proxy;

                            Proxy selectedProxy = ars.DataContext.Proxies.FirstOrDefault(x => x.IP == wp.Address.Host && x.Port == wp.Address.Port);
                            if (selectedProxy != null)
                            {
                                selectedProxy.Enabled = false;
                                lock (KeyChain[ars.IP])
                                {
                                    lock (MasterKey)
                                    {
                                        ars.DataContext.SubmitChanges();
                                    }
                                }
                            }
                        }
                        banned403 = true;
                        return;
                    }

                    lock (EventLogKey)
                    {
                        EventLog.WriteEntry("An error has occurred while retreiving a feed:\n\n" + ex.Message + "\n\n" + (ex.InnerException == null ? "" : ex.InnerException.Message)
                                            + "\n\n" + SiteSection.Name + "\n" + City.Name + "\n" + depth.ToString(), EventLogEntryType.Warning);
                    }
                    return;
                }

                HtmlNodeCollection rows = response.DocumentNode.SelectNodes("//p[@class='row' and @data-pid]");

                if (rows == null)
                {
                    //The eventlog below is not necessarily true.  It's possible that the end of the feeds has been reached and there are no more listings.
                    //EventLog.WriteEntry("Error while parsing feed.  No connection errors thrown, but no results returned either.\n" + SiteSection.Name + "\n" + City.Name + "\n" + (subCity==null?"":("\n"+subCity.SubCity)) + "\n" + depth.ToString());
                    return;
                }
                bool NoUpdateTimesPosted = false;

                foreach (HtmlNode row in rows)
                {
                    if (row.SelectSingleNode("a").Attributes["href"].Value.Contains("craigslist."))
                    {       //This condition deals with "Nearby Areas".  Links within the locale are relative -> /<sectionCode>/<Id>.html, whereas out of the locale are global -> http://<city.Name>/craigslist.org/<sectionCode>/<Id>.html
                        return;
                    }

                    Listing listingSource = new Listing()
                    {
                        CLSiteSection = SiteSection, CLCity = City, CLSubCity = subCity, Timestamp = DateTime.Now
                    };

                    //LASTUPDATED
                    try
                    {
                        HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='pl']/time");
                        if (hn != null && DateTime.Parse(hn.Attributes["datetime"].Value) <= lastStamp)
                        {
                            return;
                        }
                        else if (hn == null)
                        {
                            NoUpdateTimesPosted = true;
                        }
                    }
                    catch (Exception ex)
                    {
                        lock (EventLogKey)
                        {
                            EventLog.WriteEntry("Failure to parse listing update time.\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Error);
                        }
                        return;
                    }

                    //ID
                    try
                    {
                        listingSource.Id = long.Parse(row.Attributes["data-pid"].Value);
                    }
                    catch (Exception ex)
                    {
                        lock (EventLogKey)
                        {
                            EventLog.WriteEntry("Listing has an invalid ID:\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning);
                        }
                        continue;
                    }

                    lock (KeyChain[ars.IP])
                    {
                        if (CompletedListingIds[ars.IP].Contains(listingSource.Id) ||
                            ListingQueues[ars.IP].Any(x => x.Id == listingSource.Id) ||
                            ProcessingListings[ars.IP].Any(x => x.Id == listingSource.Id) ||
                            PreQueueListings[ars.IP].Any(x => x.Id == listingSource.Id)
                            )
                        {
                            continue;
                        }
                        PreQueueListings[ars.IP].Add(listingSource);
                    }
                    //PRICE
                    {
                        HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='price']");
                        if (hn != null)
                        {
                            try
                            {
                                string value = long.Parse(hn.InnerText.Replace("$", "").Replace(@"&#x0024;", "")).ToString();
                                listingSource.ListingAttributes.Add(
                                    new ListingAttribute()
                                {
                                    Name  = "price",
                                    Value = value
                                }
                                    );
                            }
                            catch (Exception ex)
                            {
                                lock (EventLogKey)
                                {
                                    EventLog.WriteEntry("Error parsing price listing data:  " + hn.InnerText.Replace("$", "").Replace(@"&#x0024;", "") + "\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning);
                                }
                            }
                        }
                    }

                    //LOCALE
                    {
                        HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='pnr']/small");
                        if (hn != null)
                        {
                            try
                            {
                                listingSource.ListingAttributes.Add(new ListingAttribute()
                                {
                                    Name = "Locale", Value = hn.InnerText.Replace("(", "").Replace(")", "").Trim()
                                });
                            }
                            catch (Exception ex)
                            {
                                lock (EventLogKey)
                                {
                                    EventLog.WriteEntry("Error parsing listing locale data:  " + hn.InnerText.Replace("(", "").Replace(")", "").Trim() + "\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning);
                                }
                            }
                        }
                    }

                    //HAS PICTURE, HAS MAP
                    try
                    {
                        HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='pnr']/span[@class='px']/span[@class='p']");
                        if (hn != null)
                        {
                            if (hn.InnerText.Contains("pic"))
                            {
                                listingSource.ListingAttributes.Add(new ListingAttribute()
                                {
                                    Name = "Has Picture", Value = "True"
                                });
                            }
                            if (hn.InnerText.Contains("map"))
                            {
                                listingSource.ListingAttributes.Add(new ListingAttribute()
                                {
                                    Name = "Has Map", Value = "True"
                                });
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        lock (EventLogKey)
                        {
                            EventLog.WriteEntry("Error while parsing pic and map presence:\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning);
                        }
                    }

                    //TITLE
                    try
                    {
                        listingSource.Title = row.SelectSingleNode("span[@class='txt']/span[@class='pl']/a[@class='hdrlnk']").InnerText;
                    }
                    catch (Exception ex)
                    {
                        lock (EventLogKey)
                        {
                            EventLog.WriteEntry("Listing skipped.  Error parsing title from listing.\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning);
                        }
                        lock (KeyChain[ars.IP])
                        {
                            PreQueueListings[ars.IP].Remove(listingSource);
                        }
                        continue;
                    }

                    lock (KeyChain[ars.IP])
                    {
                        ListingQueues[ars.IP].Enqueue(listingSource);
                        ProcessingListings[ars.IP].Remove(listingSource);
                        PreQueueListings[ars.IP].Remove(listingSource);
                    }
                }

                //If the for loop completes without a return, the next page needs to be looked at
                if (NoUpdateTimesPosted == false)
                {
                    lock (KeyChain[ars.IP])
                    {
                        FeedQueues[ars.IP].AddFirst(new Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>(feedResource.Item1, feedResource.Item2, feedResource.Item3 + 100, feedResource.Item4, feedResource.Item5));
                    }
                }
            }
            catch (Exception ex)
            {
                lock (EventLogKey)
                {
                    EventLog.WriteEntry("An error has occurred while parsing the feed:\n\n" + PrintException(ex), EventLogEntryType.Error);
                }
            }
            finally
            {
                sw.Stop();
                parseFeedProcessingTime += sw.Elapsed;
                parseFeedProcessingCount++;

                if (ConnectionCooldown > 0 && sw.Elapsed < TimeSpan.FromMilliseconds(ConnectionCooldown))
                {
                    Thread.Sleep(TimeSpan.FromMilliseconds(ConnectionCooldown) - sw.Elapsed);
                }

                try
                {
                    if (banned403 == false)
                    {
                        FetchNextWhatchamacallit(ars.IP, ars.proxy, ars.DataContext);
                    }
                }
                catch (Exception ex)
                {
                    lock (EventLogKey)
                    {
                        EventLog.WriteEntry("Error fetching requests from next queue.\n\n" + PrintException(ex));
                    }
                }
            }
        }