public cGatherUrlItem Dequeue()
        {
            cGatherUrlItem item = null;

            lock (((ICollection)MainUrls).SyncRoot)
            {
                if (this.MainUrls.Count > 0)
                {
                    lock (((ICollection)MainUrls).SyncRoot)
                    {
                        item = MainUrls.Dequeue();
                        return(item);
                    }
                }
            }
            lock (((ICollection)RunUrls).SyncRoot)
            {
                if (this.RunUrls.Count > 0)
                {
                    lock (((ICollection)RunUrls).SyncRoot)
                    {
                        item = RunUrls.Dequeue();
                        return(item);
                    }
                }
            }
            return(item);
        }
 public void Enqueue(cGatherUrlItem item)
 {
     if (item.GaterherFlag == EnumGloabParas.EnumUrlGaterherState.Run)
     {
         this.RunUrls.Enqueue(item);
     }
     else
     {
         this.MainUrls.Enqueue(item);
     }
 }
        private void DownFileQueue(cGatherUrlItem item, DataTable tempData)
        {
            List <TaskColumnItem> taskList = m_GatherTaskManage.ColumnItemList.Where(q => q.DataFileType != EnumGloabParas.EnumDataFileType.Text).ToList();

            foreach (DataRow dr in tempData.Rows)
            {
                foreach (TaskColumnItem column in taskList)
                {
                    #region  载文件
                    string value = dr[column.DataTextType.Value].ToString();
                    string url   = this.GetNextUrl(value, item.Url);
                    if (m_GatherTaskManage.DownFileQueue)
                    {
                        if (!m_GatherTaskManage.ContainsDownFileUrl(url))
                        {
                            m_GatherTaskManage.runFileUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlBaseItem()
                            {
                                Url = url,
                            });
                            if (e_TotalCount != null)
                            {
                                e_TotalCount(new cGatherCompletedEventArgs()
                                {
                                    ErrorCount = 0,
                                    TotalCount = 1,
                                    TrueCount  = 0,
                                });
                            }
                        }
                    }
                    else
                    {
                        //直接下载文件
                        if (e_TotalCount != null)
                        {
                            e_TotalCount(new cGatherCompletedEventArgs()
                            {
                                ErrorCount    = 0,
                                GaterherState = EnumGloabParas.EnumThreadState.Run,
                                TotalCount    = 1,
                                TrueCount     = 0,
                            });
                        }
                        this.DownFileQueue(url);
                    }
                    #endregion
                }
            }
        }
 public DataTable GetDataTable(DataTable dataTable, List <TaskColumnItem> columnItemList, cGatherUrlItem item, string webCookie, EnumGloabParas.EnumEncodeType webEncode, bool isAjax, ref string errMsg)
 {
     try
     {
         string html = this.GetHtml(item.Url, webCookie, webEncode, item.StartPos, item.EndPos, isAjax);
         if (string.IsNullOrEmpty(html))
         {
             errMsg = "网页获取为空,有可能是采集范围设置错误或网络错误";
             return(null);
         }
         dataTable = this.GetDataTable(dataTable, columnItemList, html, ref errMsg);
     }
     catch (Exception ex)
     {
         errMsg = ex.Message;
     }
     return(dataTable);
 }
Exemple #5
0
 public void Add(cGatherUrlItem item)
 {
     this.CompletedWebUrls.Add(item);
 }
        private void GatherWebUrl(cGatherUrlItem gatherUrlItem, ref bool exitFlag, ref string errMsg)
        {
            exitFlag = false;

            m_GatherTaskManage.OnLog(new cGatherEventArgs()
            {
                Message     = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "正在采集中,请稍候...",
                MessageType = EnumGloabParas.EnumMessageType.INFO,
                ThreadName  = ThreadName
            });

            string html     = string.Empty;
            int    tryCount = m_GatherTaskManage.TryAgainCount;

            while (true)
            {
                try
                {
                    html = this.GetHtml(gatherUrlItem.Url, m_GatherTaskManage.WebCookie, m_GatherTaskManage.WebEncode, gatherUrlItem.StartPos, gatherUrlItem.EndPos, m_GatherTaskManage.IsAjax);
                }
                catch (Exception ex)
                {
                    m_GatherTaskManage.OnLog(new cGatherEventArgs()
                    {
                        Message     = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "采集出现错误,重试中..." + ex.Message,
                        MessageType = EnumGloabParas.EnumMessageType.ERROR,
                        ThreadName  = ThreadName
                    });
                    if (tryCount > 0)
                    {
                        tryCount--;
                        continue;
                    }
                }
                break;
            }
            m_GatherTaskManage.OnLog(new cGatherEventArgs()
            {
                Message     = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "正在采集中,请稍候...",
                MessageType = EnumGloabParas.EnumMessageType.INFO,
                ThreadName  = ThreadName
            });
            DataTable tempData = m_GatherData.Clone();

            tempData = _getTempData.GetDataTable(tempData, m_GatherTaskManage.ColumnItemList, gatherUrlItem, m_GatherTaskManage.WebCookie, m_GatherTaskManage.WebEncode, m_GatherTaskManage.IsAjax, ref errMsg);
            if (tempData == null || tempData.Rows.Count == 0 || !string.IsNullOrEmpty(errMsg))
            {
                m_GatherTaskManage.OnLog(new cGatherEventArgs()
                {
                    Message     = gatherUrlItem.Url + "没有数据,也有可能是垃圾数据导致...",
                    MessageType = EnumGloabParas.EnumMessageType.ERROR,
                    ThreadName  = ThreadName
                });
                if (e_CompleteCount != null)
                {
                    e_CompleteCount(new cGatherCompleteCountEventArgs()
                    {
                        CompleteType = EnumGloabParas.EnumThreadCompleteType.Error,
                        StartPos     = gatherUrlItem.StartPos,
                        EndPos       = gatherUrlItem.EndPos,
                        GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Error,
                        Level        = gatherUrlItem.Level,
                        LevelUrlList = gatherUrlItem.LevelUrlList,
                        NextPageText = gatherUrlItem.NextPageText,
                        Url          = gatherUrlItem.Url,
                    });
                }
                return;
            }
            if (m_GatherTaskManage.DownFileFlag)
            {
                DownFileQueue(gatherUrlItem, tempData);
            }
            else
            {
                //直接下载图片
            }
            lock (tempData)
            {
                m_GatherData.Merge(tempData);
                if (e_OnGatherDataCompleted != null)
                {
                    e_OnGatherDataCompleted(new cGatherDataEventArgs()
                    {
                        TaskID     = m_GatherTaskManage.TaskID,
                        dataTable  = tempData,
                        ErrorCount = m_GatherTaskManage.ErrorCount,
                        TotalCount = m_GatherTaskManage.TotalCount,
                        TrueCount  = m_GatherTaskManage.TrueCount,
                    });
                }
            }
            m_GatherTaskManage.OnLog(new cGatherEventArgs()
            {
                Message     = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "采集完成...",
                MessageType = EnumGloabParas.EnumMessageType.INFO,
                ThreadName  = ThreadName
            });
            if (e_CompleteCount != null)
            {
                e_CompleteCount(new cGatherCompleteCountEventArgs()
                {
                    CompleteType = EnumGloabParas.EnumThreadCompleteType.Success,
                    StartPos     = gatherUrlItem.StartPos,
                    EndPos       = gatherUrlItem.EndPos,
                    GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Completed,
                    Level        = gatherUrlItem.Level,
                    LevelUrlList = gatherUrlItem.LevelUrlList,
                    NextPageText = gatherUrlItem.NextPageText,
                    Url          = gatherUrlItem.Url,
                });
            }
        }