//清除当前全部请求 public void UF_Clear() { try { lock (m_ListAsyncRequestOpera) { m_ListAsyncRequestOpera.Clear(); } lock (m_ListAsyncHandle) { // 中断所有进行中的请求 lock (m_ListAsyncHandle) { for (int k = 0; k < m_ListAsyncHandle.Count; k++) { AsyncRequestStruct ars = m_ListAsyncHandle[k]; try{ ars.request.Abort(); } catch (System.Exception e) { Debugger.UF_Exception(e); } } m_ListAsyncHandle.Clear(); } } } catch (System.Exception ex) { Debugger.UF_Exception(ex); } }
/// <summary> /// 异步请求 /// 注意回调函数可能在另外一个线程中执行,要避免线程引发的异常 /// 如果已经在执行AsynRequest 已经执行 或 BegineAsynRequest正在执行 ,该接口调用无效,将返回false /// </summary> public bool UF_AsynRequest(int stype, string url, string param, string headinfo, int timeOut, DelegateResponse callback) { HttpWebRequest request = null; AsyncRequestStruct ars = new AsyncRequestStruct(); int uid = GID; try{ ars.id = uid; ars.stamp = System.Environment.TickCount; ars.callback = callback; lock (m_ListAsyncHandle){ m_ListAsyncHandle.Add(ars); } request = UF_CreateRequest(url, stype, param, headinfo, timeOut); ars.request = request; ars.request.Timeout = timeOut; ars.request.BeginGetResponse(new AsyncCallback(UF_CallbackAsynRequest), ars); return(true); } catch (Exception e) { UF_RemoveAsyncHandle(uid); if (request != null) { request.Abort(); } Debugger.UF_Error("Http error<AsynRequest>:" + e.Message); UF_InvokCallback(callback, WebRequestRetcode.EXCEPTION_ON_REQUEST, null); } return(false); }
//轮训查询超时的HTTP请求,并主动ABORT public void UF_Update() { if (m_ListAsyncRequestOpera.Count > 0) { lock (m_ListAsyncRequestOpera) { AsyncRequestOpera opera = m_ListAsyncRequestOpera [0]; m_ListAsyncRequestOpera.RemoveAt(0); UF_AsynRequest(opera.stype, opera.url, opera.param, opera.headinfo, opera.timeOut, opera.callback); } } if (m_ListAsyncHandle.Count > 0) { lock (m_ListAsyncHandle){ for (int k = 0; k < m_ListAsyncHandle.Count; k++) { AsyncRequestStruct ars = m_ListAsyncHandle[k]; int timestamp = Math.Abs(System.Environment.TickCount - ars.stamp); if (ars.request != null && ars.request.Timeout < timestamp) { Debugger.UF_Error(string.Format("Request TimeOut: Current:{0} Stamp: {1} ", System.Environment.TickCount, ars.stamp)); m_ListAsyncHandle.RemoveAt(k); try{ ars.request.Abort(); } catch (System.Exception e) { Debugger.UF_Exception(e); } //回归主线程调用 UF_InvokCallback(ars.callback, WebRequestRetcode.TIME_OUT, null); break; } } } } }
public void FetchNextWhatchamacallit(string IP, IWebProxy proxy, DataAccessDataContext DataContext) { Queue <Listing> ListingQueue = ListingQueues[IP]; lock (KeyChain[IP]) { if (ListingQueue.Count > 0) { HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create("http://" + ListingQueue.Peek().CLCity.Name + ".craigslist.org/" + (ListingQueue.Peek().CLSubCity == null ? "" : (ListingQueue.Peek().CLSubCity.SubCity + "/")) + ListingQueue.Peek().CLSiteSection.Name + "/" + ListingQueue.Peek().Id.ToString() + ".html"); { hwr.Proxy = proxy; hwr.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate"); hwr.KeepAlive = false; hwr.UserAgent = Properties.Settings.Default.UserAgent; AsyncRequestStruct ars = new AsyncRequestStruct() { request = hwr, parameters = ListingQueue.Peek(), IP = IP, proxy = proxy, DataContext = DataContext }; ars.stopwatch = new Stopwatch(); ars.stopwatch.Start(); hwr.BeginGetResponse(new AsyncCallback(ParseListingInfo), ars); } ProcessingListings[IP].Add(ListingQueue.Dequeue()); } else { if (FeedQueues[IP].Count == 0) { BuildFeedQueue(IP, DataContext); } var selectedItem = FeedQueues[IP].First.Value; FeedQueues[IP].RemoveFirst(); HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create("http://" + selectedItem.Item2.Name + ".craigslist.org/search/" + (selectedItem.Item5 == null ? "" : (selectedItem.Item5.SubCity + "/")) + selectedItem.Item1.Name + "?s=" + selectedItem.Item3.ToString()); { hwr.Proxy = proxy; hwr.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate"); hwr.KeepAlive = false; hwr.UserAgent = Properties.Settings.Default.UserAgent; AsyncRequestStruct ars = new AsyncRequestStruct() { request = hwr, parameters = selectedItem, IP = IP, proxy = proxy, DataContext = DataContext }; ars.stopwatch = new Stopwatch(); ars.stopwatch.Start(); hwr.BeginGetResponse(new AsyncCallback(ParseFeed), ars); } } } }
private void UF_CallbackAsynRequest(IAsyncResult result) { AsyncRequestStruct ars = ((AsyncRequestStruct)result.AsyncState); DelegateResponse call = ars.callback; HttpWebRequest _request = ars.request; int retcode = WebRequestRetcode.SUCCESS; byte[] bdata = null; HttpWebResponse response = null; //handle 列表中优先移除 UF_RemoveAsyncHandle(ars.id); try{ response = (HttpWebResponse)_request.EndGetResponse(result); Stream st = response.GetResponseStream(); List <byte> tmpDatas = new List <byte>(); int value = -1; while ((value = st.ReadByte()) != -1) { tmpDatas.Add((byte)value); } bdata = tmpDatas.ToArray(); st.Close(); // response.Close(); //300以上为异常请求 if (retcode < (int)response.StatusCode) { retcode = WebRequestRetcode.SUCCESS; } else { Debugger.UF_Error("Http Response Abnormal: " + (int)response.StatusCode); retcode = WebRequestRetcode.ABNORMAL; } } catch (Exception e) { Debugger.UF_Error("Http error<CallbackAsynRequest>:" + e.Message); retcode = WebRequestRetcode.EXCEPTION_ON_RESPONSE; } if (response != null) { response.Close(); } if (_request != null) { _request.Abort(); } //回归主线程调用 UF_InvokCallback(call, retcode, bdata); }
/// <summary> /// 异步上传文件 /// </summary> public bool UF_UploadFile(string url, string filePath, string headinfo, int timeOut, DelegateResponse callback) { if (!File.Exists(filePath)) { //文件不存在 Debugger.UF_Error("UploadFile Not Exist: " + filePath); if (callback != null) { callback(WebRequestRetcode.FILE_NOT_EXIST, null); } return(false); } AsyncRequestStruct ars = new AsyncRequestStruct(); int uid = GID; HttpWebRequest request = null; try{ ars.id = uid; ars.stamp = System.Environment.TickCount; ars.callback = callback; lock (m_ListAsyncHandle){ m_ListAsyncHandle.Add(ars); } FileStream fs = File.OpenRead(filePath); byte[] buffer = new byte[fs.Length]; fs.Read(buffer, 0, (int)fs.Length); fs.Close(); //request = (HttpWebRequest)HttpWebRequest.Create(url); request = System.Net.WebRequest.CreateHttp(url); if (!string.IsNullOrEmpty(headinfo)) { request.Headers.Add(HttpRequestHeader.Authorization, headinfo); } request.Method = "POST"; request.ContentType = "application/x-www-form-urlencoded"; request.ContentLength = buffer.Length; Stream postStream = request.GetRequestStream(); postStream.Write(buffer, 0, buffer.Length); postStream.Close(); ars.request = request; ars.request.Timeout = timeOut; ars.request.BeginGetResponse(new AsyncCallback(UF_CallbackAsynRequest), ars); } catch (Exception e) { UF_RemoveAsyncHandle(uid); if (request != null) { request.Abort(); } Debugger.UF_Error("Http error<UploadFile>:" + e.Message); if (callback != null) { callback(WebRequestRetcode.EXCEPTION_ON_REQUEST, null); } return(false); } return(true); }
public void ParseListingInfo(IAsyncResult AsyncResult) { bool banned403 = false; Stopwatch sw = new Stopwatch(); sw.Start(); AsyncRequestStruct ars = (AsyncRequestStruct)AsyncResult.AsyncState; try { ars.stopwatch.Stop(); parseInfoConnectionCount++; parseInfoConnectionTime += ars.stopwatch.Elapsed; Listing listingSource = (Listing)ars.parameters; HtmlDocument response = new HtmlDocument(); try { using (WebResponse wr = ars.request.EndGetResponse(AsyncResult)) { using (Stream stream = wr.GetResponseStream()) { using (StreamReader sr = new StreamReader(stream)) { response.LoadHtml(sr.ReadToEnd()); } } } } catch (Exception ex) { if (ex.Message.Contains("(403)")) //If a 403 occurs, remove the proxy from operation. { if (Properties.Settings.Default.DisabledBannedProxies) { WebProxy wp = (WebProxy)ars.request.Proxy; lock (KeyChain[ars.IP]) { Proxy selectedProxy = ars.DataContext.Proxies.FirstOrDefault(x => x.IP == wp.Address.Host && x.Port == wp.Address.Port); if (selectedProxy != null) { selectedProxy.Enabled = false; lock (MasterKey) { ars.DataContext.SubmitChanges(); } } } } lock (KeyChain[ars.IP]) { ListingQueues[ars.IP].Enqueue(listingSource); ProcessingListings[ars.IP].Remove(listingSource); } lock (EventLogKey) { EventLog.WriteEntry("Proxy has been banned." + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } banned403 = true; } else { lock (KeyChain[ars.IP]) { if (ListingFailures[ars.IP].Keys.Contains(listingSource)) { ListingFailures[ars.IP][listingSource]++; if (ListingFailures[ars.IP][listingSource] > Properties.Settings.Default.MaxConnectionRetries) { ListingFailures[ars.IP].Remove(listingSource); } else { ListingQueues[ars.IP].Enqueue(listingSource); ProcessingListings[ars.IP].Remove(listingSource); } } else { ListingFailures[ars.IP].Add(listingSource, 1); ListingQueues[ars.IP].Enqueue(listingSource); ProcessingListings[ars.IP].Remove(listingSource); } } lock (EventLogKey) { EventLog.WriteEntry("An error has occurred while retrieving the listing info:\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } } return; } //LISTING WAS DELETED { HtmlNode hn = response.DocumentNode.SelectSingleNode("//div[@class='removed']"); if (hn != null) { lock (KeyChain[ars.IP]) { ProcessingListings[ars.IP].Remove(listingSource); } return; } } //POSTDATE try { listingSource.PostDate = DateTime.Parse(response.DocumentNode.SelectSingleNode("//p[@id='display-date']/time").Attributes["datetime"].Value); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Listing skipped. Post date could not be identified or parsed.\n\n" + PrintException(ex) + PrintListing(listingSource) + ex.Message, EventLogEntryType.Warning); } lock (KeyChain[ars.IP]) { ProcessingListings[ars.IP].Remove(listingSource); } return; } //GPS COORDINATES if (listingSource.ListingAttributes.Any(x => x.Name == "Has Map")) { try { HtmlNode hn = response.DocumentNode.SelectSingleNode("//div[@id='map' and @data-latitude and @data-longitude and @data-accuracy]"); listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Latitude", Value = hn.Attributes["data-latitude"].Value }); listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Longitude", Value = hn.Attributes["data-longitude"].Value }); listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Location Accuracy", Value = hn.Attributes["data-accuracy"].Value }); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error parsing GPS coordinates:\n\n" + PrintException(ex) + PrintListing(listingSource)); } } } //ADDRESS { HtmlNode hn = response.DocumentNode.SelectSingleNode("//div[@class='mapaddress']"); if (hn != null) { try { listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Address", Value = hn.InnerText }); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error parsing map address: \n\n" + ex.Message); } } } } HtmlNodeCollection hnc = response.DocumentNode.SelectNodes("//p[@class='attrgroup']/span"); if (hnc == null) { return; //returns NULL if no nodes exist. } foreach (HtmlNode hn in hnc) { string[] parts = hn.InnerText.Split(':'); if (parts.Length == 2) { listingSource.ListingAttributes.Add( new ListingAttribute() { Name = parts[0].Trim(), Value = parts[1].Trim() } ); } else if (parts.Length == 1) //More ads by this user is styled like an attribute, but is actually just a link. It provides no info besides userid. { listingSource.ListingAttributes.Add( new ListingAttribute() { Name = "Unspecified", Value = parts[0].Trim() } ); } } //BODY try { HtmlNode hn = response.DocumentNode.SelectSingleNode("//section[@id='postingbody']"); listingSource.Body = hn.InnerText; } catch (Exception ex) { lock (KeyChain[ars.IP]) { ProcessingListings[ars.IP].Remove(listingSource); } lock (EventLogKey) { EventLog.WriteEntry("An error has occurred while parsing the listing body: \n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } return; } lock (KeyChain[ars.IP]) { ars.DataContext.Listings.InsertOnSubmit(listingSource); CompletedListingIds[ars.IP].Add(listingSource.Id); ProcessingListings[ars.IP].Remove(listingSource); SubmitData(ars.DataContext); } } finally { sw.Stop(); parseInfoProcessingTime += sw.Elapsed; parseInfoProcessingCount++; if (ConnectionCooldown > 0 && sw.Elapsed < TimeSpan.FromMilliseconds(ConnectionCooldown)) { Thread.Sleep(TimeSpan.FromMilliseconds(ConnectionCooldown) - sw.Elapsed); } try { if (banned403 == false) { FetchNextWhatchamacallit(ars.IP, ars.proxy, ars.DataContext); } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error fetching requests from next queue.\n\n" + ex.Message); } } } }
private void ParseFeed(IAsyncResult AsyncResult) { bool banned403 = false; Stopwatch sw = new Stopwatch(); sw.Start(); AsyncRequestStruct ars = (AsyncRequestStruct)AsyncResult.AsyncState; try { ars.stopwatch.Stop(); parseFeedConnectionCount++; parseFeedConnectionTime += ars.stopwatch.Elapsed; Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity> feedResource = (Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>)ars.parameters; CLSiteSection SiteSection = feedResource.Item1; CLCity City = feedResource.Item2; int depth = feedResource.Item3; DateTime lastStamp = feedResource.Item4; CLSubCity subCity = feedResource.Item5; HtmlDocument response = new HtmlDocument(); try { using (WebResponse wr = ars.request.EndGetResponse(AsyncResult)) { using (Stream stream = wr.GetResponseStream()) { using (StreamReader sr = new StreamReader(stream)) { response.LoadHtml(sr.ReadToEnd()); } } } } catch (Exception ex) { if (ex.Message.Contains("(403)")) //If a 403 occurs, remove the proxy from operation. { if (Properties.Settings.Default.DisabledBannedProxies) { WebProxy wp = (WebProxy)ars.request.Proxy; Proxy selectedProxy = ars.DataContext.Proxies.FirstOrDefault(x => x.IP == wp.Address.Host && x.Port == wp.Address.Port); if (selectedProxy != null) { selectedProxy.Enabled = false; lock (KeyChain[ars.IP]) { lock (MasterKey) { ars.DataContext.SubmitChanges(); } } } } banned403 = true; return; } lock (EventLogKey) { EventLog.WriteEntry("An error has occurred while retreiving a feed:\n\n" + ex.Message + "\n\n" + (ex.InnerException == null ? "" : ex.InnerException.Message) + "\n\n" + SiteSection.Name + "\n" + City.Name + "\n" + depth.ToString(), EventLogEntryType.Warning); } return; } HtmlNodeCollection rows = response.DocumentNode.SelectNodes("//p[@class='row' and @data-pid]"); if (rows == null) { //The eventlog below is not necessarily true. It's possible that the end of the feeds has been reached and there are no more listings. //EventLog.WriteEntry("Error while parsing feed. No connection errors thrown, but no results returned either.\n" + SiteSection.Name + "\n" + City.Name + "\n" + (subCity==null?"":("\n"+subCity.SubCity)) + "\n" + depth.ToString()); return; } bool NoUpdateTimesPosted = false; foreach (HtmlNode row in rows) { if (row.SelectSingleNode("a").Attributes["href"].Value.Contains("craigslist.")) { //This condition deals with "Nearby Areas". Links within the locale are relative -> /<sectionCode>/<Id>.html, whereas out of the locale are global -> http://<city.Name>/craigslist.org/<sectionCode>/<Id>.html return; } Listing listingSource = new Listing() { CLSiteSection = SiteSection, CLCity = City, CLSubCity = subCity, Timestamp = DateTime.Now }; //LASTUPDATED try { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='pl']/time"); if (hn != null && DateTime.Parse(hn.Attributes["datetime"].Value) <= lastStamp) { return; } else if (hn == null) { NoUpdateTimesPosted = true; } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Failure to parse listing update time.\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Error); } return; } //ID try { listingSource.Id = long.Parse(row.Attributes["data-pid"].Value); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Listing has an invalid ID:\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } continue; } lock (KeyChain[ars.IP]) { if (CompletedListingIds[ars.IP].Contains(listingSource.Id) || ListingQueues[ars.IP].Any(x => x.Id == listingSource.Id) || ProcessingListings[ars.IP].Any(x => x.Id == listingSource.Id) || PreQueueListings[ars.IP].Any(x => x.Id == listingSource.Id) ) { continue; } PreQueueListings[ars.IP].Add(listingSource); } //PRICE { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='price']"); if (hn != null) { try { string value = long.Parse(hn.InnerText.Replace("$", "").Replace(@"$", "")).ToString(); listingSource.ListingAttributes.Add( new ListingAttribute() { Name = "price", Value = value } ); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error parsing price listing data: " + hn.InnerText.Replace("$", "").Replace(@"$", "") + "\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } } } } //LOCALE { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='pnr']/small"); if (hn != null) { try { listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Locale", Value = hn.InnerText.Replace("(", "").Replace(")", "").Trim() }); } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error parsing listing locale data: " + hn.InnerText.Replace("(", "").Replace(")", "").Trim() + "\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } } } } //HAS PICTURE, HAS MAP try { HtmlNode hn = row.SelectSingleNode("span[@class='txt']/span[@class='l2']/span[@class='pnr']/span[@class='px']/span[@class='p']"); if (hn != null) { if (hn.InnerText.Contains("pic")) { listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Has Picture", Value = "True" }); } if (hn.InnerText.Contains("map")) { listingSource.ListingAttributes.Add(new ListingAttribute() { Name = "Has Map", Value = "True" }); } } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error while parsing pic and map presence:\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } } //TITLE try { listingSource.Title = row.SelectSingleNode("span[@class='txt']/span[@class='pl']/a[@class='hdrlnk']").InnerText; } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Listing skipped. Error parsing title from listing.\n\n" + PrintException(ex) + PrintListing(listingSource), EventLogEntryType.Warning); } lock (KeyChain[ars.IP]) { PreQueueListings[ars.IP].Remove(listingSource); } continue; } lock (KeyChain[ars.IP]) { ListingQueues[ars.IP].Enqueue(listingSource); ProcessingListings[ars.IP].Remove(listingSource); PreQueueListings[ars.IP].Remove(listingSource); } } //If the for loop completes without a return, the next page needs to be looked at if (NoUpdateTimesPosted == false) { lock (KeyChain[ars.IP]) { FeedQueues[ars.IP].AddFirst(new Tuple <CLSiteSection, CLCity, int, DateTime, CLSubCity>(feedResource.Item1, feedResource.Item2, feedResource.Item3 + 100, feedResource.Item4, feedResource.Item5)); } } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("An error has occurred while parsing the feed:\n\n" + PrintException(ex), EventLogEntryType.Error); } } finally { sw.Stop(); parseFeedProcessingTime += sw.Elapsed; parseFeedProcessingCount++; if (ConnectionCooldown > 0 && sw.Elapsed < TimeSpan.FromMilliseconds(ConnectionCooldown)) { Thread.Sleep(TimeSpan.FromMilliseconds(ConnectionCooldown) - sw.Elapsed); } try { if (banned403 == false) { FetchNextWhatchamacallit(ars.IP, ars.proxy, ars.DataContext); } } catch (Exception ex) { lock (EventLogKey) { EventLog.WriteEntry("Error fetching requests from next queue.\n\n" + PrintException(ex)); } } } }