public void Add(cGatherUrlBaseItem item) { lock (((ICollection)this).SyncRoot) { this.CompletedUrls.Add(item); } }
public cGatherUrlBaseItem Dequeue() { cGatherUrlBaseItem item = null; lock (((ICollection)FileUrls).SyncRoot) { if (this.FileUrls.Count > 0) { lock (((ICollection)FileUrls).SyncRoot) { item = FileUrls.Dequeue(); return(item); } } } return(item); }
internal void ThreadGetUrlsWork() { this.ThreadState = EnumGloabParas.EnumThreadState.Run; cGatherUrlItem gatherUrlItem; while (m_GatherTaskManage.ThreadState == EnumGloabParas.EnumThreadState.Run && this.ThreadState == EnumGloabParas.EnumThreadState.Run) { #region 始终在执行线程 try { if (m_GatherTaskManage.runWebUrls[this.ThreadIndex].Count > 0) { #region 采集网页数据中 lock ((m_GatherTaskManage.runWebUrls[this.ThreadIndex]).SyncRoot) { if (m_GatherTaskManage.runWebUrls[this.ThreadIndex].Count > 0) { lock (((ICollection)m_GatherTaskManage.completeThreadList).SyncRoot) { if (m_GatherTaskManage.completeThreadList.Contains(this.ThreadName)) { lock (((ICollection)m_GatherTaskManage.completeThreadList).SyncRoot) { if (m_GatherTaskManage.completeThreadList.Contains(this.ThreadName)) { m_GatherTaskManage.completeThreadList.Remove(this.ThreadName); } } } } #region 运行采集网页数据 gatherUrlItem = m_GatherTaskManage.runWebUrls[this.ThreadIndex].Dequeue(); if (gatherUrlItem.GaterherFlag == EnumGloabParas.EnumUrlGaterherState.Run) { bool exitFlag = false; string errMsg = string.Empty; GatherWebUrl(gatherUrlItem, ref exitFlag, ref errMsg); if (exitFlag) { break; } } else { #region 查找下级网页 m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = gatherUrlItem.Url + "查找下级网页中.", MessageType = EnumGloabParas.EnumMessageType.NOTIFY, ThreadName = ThreadName }); string html = string.Empty; int tryCount = m_GatherTaskManage.TryAgainCount; bool exitFlag = false; #region 重试获取网页中 while (true) { try { html = this.GetHtml(gatherUrlItem.Url, m_GatherTaskManage.WebCookie, m_GatherTaskManage.WebEncode, gatherUrlItem.StartPos, gatherUrlItem.EndPos, m_GatherTaskManage.IsAjax); } catch (Exception ex) { m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "采集出现错误,重试中." + ex.Message, MessageType = EnumGloabParas.EnumMessageType.ERROR, ThreadName = ThreadName }); if (tryCount > 0) { tryCount--; continue; } exitFlag = true; } break; } #endregion if (exitFlag) { m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = gatherUrlItem.Url + "查找下级网页失败.", MessageType = EnumGloabParas.EnumMessageType.ERROR, ThreadName = ThreadName }); if (e_CompleteCount != null) { e_CompleteCount(new cGatherCompleteCountEventArgs() { CompleteType = EnumGloabParas.EnumThreadCompleteType.Error, StartPos = gatherUrlItem.StartPos, EndPos = gatherUrlItem.EndPos, GaterherFlag = EnumGloabParas.EnumUrlGaterherState.FirstError, Level = gatherUrlItem.Level, LevelUrlList = gatherUrlItem.LevelUrlList, NextPageText = gatherUrlItem.NextPageText, Url = gatherUrlItem.Url, }); } continue; } if (!string.IsNullOrEmpty(gatherUrlItem.NextPageText)) { string nextUrl = _getTempData.GetNextPage(gatherUrlItem.Url, html, gatherUrlItem.NextPageText); if (!string.IsNullOrEmpty(nextUrl)) { nextUrl = this.GetNextUrl(nextUrl, gatherUrlItem.Url); if (!m_GatherTaskManage.ContainsUrl(nextUrl)) { //这里始终加入到第一进程中,提前采集可用的网址,当无网址时 0线程将会采集Run的网页 m_GatherTaskManage.runWebUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlItem() { GaterherFlag = gatherUrlItem.GaterherFlag, LevelUrlList = gatherUrlItem.LevelUrlList, NextPageText = gatherUrlItem.NextPageText, Url = nextUrl, Level = gatherUrlItem.Level, StartPos = gatherUrlItem.StartPos, EndPos = gatherUrlItem.EndPos, }); if (e_TotalCount != null) { e_TotalCount(new cGatherCompletedEventArgs() { ErrorCount = 0, TotalCount = 1, TrueCount = 0, }); } m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = gatherUrlItem.Url + "获取下一页导航成功.", MessageType = EnumGloabParas.EnumMessageType.NOTIFY, ThreadName = ThreadName }); } } else { m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = gatherUrlItem.Url + "获取下一页导航失败.", MessageType = EnumGloabParas.EnumMessageType.ERROR, ThreadName = ThreadName }); //这里为了避免没有下一页,不计入ErrorCount } } if (gatherUrlItem.LevelUrlList.Count > gatherUrlItem.Level) { #region 获取下页导航数据 string nextUrl = gatherUrlItem.LevelUrlList[gatherUrlItem.Level]; List <string> levelUrls = GetNextLevelUrl(gatherUrlItem.Url, html, nextUrl); m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = gatherUrlItem.Url + "获取新的网页个数" + levelUrls.Count + ".", MessageType = EnumGloabParas.EnumMessageType.NOTIFY, ThreadName = ThreadName }); if (levelUrls.Count == 0) { m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = html, MessageType = EnumGloabParas.EnumMessageType.INFO, ThreadName = ThreadName }); } foreach (var levelUrl in levelUrls) { if (m_GatherTaskManage.ContainsUrl(levelUrl)) { continue; } if (gatherUrlItem.LevelUrlList.Count > gatherUrlItem.Level + 1) { //这里始终加入到第一进程中,提前采集可用的网址,当无网址时 0线程将会采集Run的网页 m_GatherTaskManage.runWebUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlItem() { GaterherFlag = EnumGloabParas.EnumUrlGaterherState.First, LevelUrlList = gatherUrlItem.LevelUrlList, NextPageText = "",//下一页标识只针对一级页面有效的 Url = levelUrl, Level = gatherUrlItem.Level + 1, StartPos = "", EndPos = "", }); } else { //平均分配网址到线程中采集 m_GatherTaskManage.runWebUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlItem() { GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Run, LevelUrlList = gatherUrlItem.LevelUrlList, NextPageText = gatherUrlItem.NextPageText, Url = levelUrl, Level = gatherUrlItem.Level + 1, StartPos = m_GatherTaskManage.LastStartPos, EndPos = m_GatherTaskManage.LastEndPos, }); if (e_TotalCount != null) { e_TotalCount(new cGatherCompletedEventArgs() { ErrorCount = 0, TotalCount = 1, TrueCount = 0, }); } } } #endregion if (e_CompleteCount != null) { e_CompleteCount(new cGatherCompleteCountEventArgs() { CompleteType = EnumGloabParas.EnumThreadCompleteType.Success, StartPos = gatherUrlItem.StartPos, EndPos = gatherUrlItem.EndPos, GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Completed, Level = gatherUrlItem.Level, LevelUrlList = gatherUrlItem.LevelUrlList, NextPageText = gatherUrlItem.NextPageText, Url = gatherUrlItem.Url, }); } } else { exitFlag = false; string errMsg = string.Empty; GatherWebUrl(gatherUrlItem, ref exitFlag, ref errMsg); if (exitFlag) { break; } } #endregion } #endregion } } #endregion } else if (m_GatherTaskManage.runFileUrls[this.ThreadIndex].Count > 0) { #region 采集图片队列 lock ((m_GatherTaskManage.runFileUrls[this.ThreadIndex]).SyncRoot) { if (m_GatherTaskManage.runFileUrls[this.ThreadIndex].Count > 0) { if (m_GatherTaskManage.completeThreadList.Contains(this.ThreadName)) { m_GatherTaskManage.completeThreadList.Remove(this.ThreadName); } cGatherUrlBaseItem gatherFileItem = m_GatherTaskManage.runFileUrls[this.ThreadIndex].Dequeue(); this.DownFileQueue(gatherFileItem.Url); } } #endregion } else { if (e_OnGatherNotityCompleted != null) { e_OnGatherNotityCompleted(ThreadName); } } } catch (Exception ex) { string errMsg = "error:" + ex.Message + ex.Source + ex.StackTrace; m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = errMsg, MessageType = EnumGloabParas.EnumMessageType.ERROR, ThreadName = ThreadName, }); DMSFrame.Loggers.LoggerManager.FileLogger.LogWithTime(errMsg); } finally { //Monitor.Exit(m_GatherTaskManage); } #endregion } }
public void Enqueue(cGatherUrlBaseItem item) { this.FileUrls.Enqueue(item); }