public cGatherUrlItem Dequeue() { cGatherUrlItem item = null; lock (((ICollection)MainUrls).SyncRoot) { if (this.MainUrls.Count > 0) { lock (((ICollection)MainUrls).SyncRoot) { item = MainUrls.Dequeue(); return(item); } } } lock (((ICollection)RunUrls).SyncRoot) { if (this.RunUrls.Count > 0) { lock (((ICollection)RunUrls).SyncRoot) { item = RunUrls.Dequeue(); return(item); } } } return(item); }
public void Enqueue(cGatherUrlItem item) { if (item.GaterherFlag == EnumGloabParas.EnumUrlGaterherState.Run) { this.RunUrls.Enqueue(item); } else { this.MainUrls.Enqueue(item); } }
private void DownFileQueue(cGatherUrlItem item, DataTable tempData) { List <TaskColumnItem> taskList = m_GatherTaskManage.ColumnItemList.Where(q => q.DataFileType != EnumGloabParas.EnumDataFileType.Text).ToList(); foreach (DataRow dr in tempData.Rows) { foreach (TaskColumnItem column in taskList) { #region 载文件 string value = dr[column.DataTextType.Value].ToString(); string url = this.GetNextUrl(value, item.Url); if (m_GatherTaskManage.DownFileQueue) { if (!m_GatherTaskManage.ContainsDownFileUrl(url)) { m_GatherTaskManage.runFileUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlBaseItem() { Url = url, }); if (e_TotalCount != null) { e_TotalCount(new cGatherCompletedEventArgs() { ErrorCount = 0, TotalCount = 1, TrueCount = 0, }); } } } else { //直接下载文件 if (e_TotalCount != null) { e_TotalCount(new cGatherCompletedEventArgs() { ErrorCount = 0, GaterherState = EnumGloabParas.EnumThreadState.Run, TotalCount = 1, TrueCount = 0, }); } this.DownFileQueue(url); } #endregion } } }
public DataTable GetDataTable(DataTable dataTable, List <TaskColumnItem> columnItemList, cGatherUrlItem item, string webCookie, EnumGloabParas.EnumEncodeType webEncode, bool isAjax, ref string errMsg) { try { string html = this.GetHtml(item.Url, webCookie, webEncode, item.StartPos, item.EndPos, isAjax); if (string.IsNullOrEmpty(html)) { errMsg = "网页获取为空,有可能是采集范围设置错误或网络错误"; return(null); } dataTable = this.GetDataTable(dataTable, columnItemList, html, ref errMsg); } catch (Exception ex) { errMsg = ex.Message; } return(dataTable); }
public void Add(cGatherUrlItem item) { this.CompletedWebUrls.Add(item); }
private void GatherWebUrl(cGatherUrlItem gatherUrlItem, ref bool exitFlag, ref string errMsg) { exitFlag = false; m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "正在采集中,请稍候...", MessageType = EnumGloabParas.EnumMessageType.INFO, ThreadName = ThreadName }); string html = string.Empty; int tryCount = m_GatherTaskManage.TryAgainCount; while (true) { try { html = this.GetHtml(gatherUrlItem.Url, m_GatherTaskManage.WebCookie, m_GatherTaskManage.WebEncode, gatherUrlItem.StartPos, gatherUrlItem.EndPos, m_GatherTaskManage.IsAjax); } catch (Exception ex) { m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "采集出现错误,重试中..." + ex.Message, MessageType = EnumGloabParas.EnumMessageType.ERROR, ThreadName = ThreadName }); if (tryCount > 0) { tryCount--; continue; } } break; } m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "正在采集中,请稍候...", MessageType = EnumGloabParas.EnumMessageType.INFO, ThreadName = ThreadName }); DataTable tempData = m_GatherData.Clone(); tempData = _getTempData.GetDataTable(tempData, m_GatherTaskManage.ColumnItemList, gatherUrlItem, m_GatherTaskManage.WebCookie, m_GatherTaskManage.WebEncode, m_GatherTaskManage.IsAjax, ref errMsg); if (tempData == null || tempData.Rows.Count == 0 || !string.IsNullOrEmpty(errMsg)) { m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = gatherUrlItem.Url + "没有数据,也有可能是垃圾数据导致...", MessageType = EnumGloabParas.EnumMessageType.ERROR, ThreadName = ThreadName }); if (e_CompleteCount != null) { e_CompleteCount(new cGatherCompleteCountEventArgs() { CompleteType = EnumGloabParas.EnumThreadCompleteType.Error, StartPos = gatherUrlItem.StartPos, EndPos = gatherUrlItem.EndPos, GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Error, Level = gatherUrlItem.Level, LevelUrlList = gatherUrlItem.LevelUrlList, NextPageText = gatherUrlItem.NextPageText, Url = gatherUrlItem.Url, }); } return; } if (m_GatherTaskManage.DownFileFlag) { DownFileQueue(gatherUrlItem, tempData); } else { //直接下载图片 } lock (tempData) { m_GatherData.Merge(tempData); if (e_OnGatherDataCompleted != null) { e_OnGatherDataCompleted(new cGatherDataEventArgs() { TaskID = m_GatherTaskManage.TaskID, dataTable = tempData, ErrorCount = m_GatherTaskManage.ErrorCount, TotalCount = m_GatherTaskManage.TotalCount, TrueCount = m_GatherTaskManage.TrueCount, }); } } m_GatherTaskManage.OnLog(new cGatherEventArgs() { Message = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "采集完成...", MessageType = EnumGloabParas.EnumMessageType.INFO, ThreadName = ThreadName }); if (e_CompleteCount != null) { e_CompleteCount(new cGatherCompleteCountEventArgs() { CompleteType = EnumGloabParas.EnumThreadCompleteType.Success, StartPos = gatherUrlItem.StartPos, EndPos = gatherUrlItem.EndPos, GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Completed, Level = gatherUrlItem.Level, LevelUrlList = gatherUrlItem.LevelUrlList, NextPageText = gatherUrlItem.NextPageText, Url = gatherUrlItem.Url, }); } }