public void Add(cGatherUrlBaseItem item)
 {
     lock (((ICollection)this).SyncRoot)
     {
         this.CompletedUrls.Add(item);
     }
 }
Beispiel #2
0
        public cGatherUrlBaseItem Dequeue()
        {
            cGatherUrlBaseItem item = null;

            lock (((ICollection)FileUrls).SyncRoot)
            {
                if (this.FileUrls.Count > 0)
                {
                    lock (((ICollection)FileUrls).SyncRoot)
                    {
                        item = FileUrls.Dequeue();
                        return(item);
                    }
                }
            }
            return(item);
        }
        internal void ThreadGetUrlsWork()
        {
            this.ThreadState = EnumGloabParas.EnumThreadState.Run;
            cGatherUrlItem gatherUrlItem;

            while (m_GatherTaskManage.ThreadState == EnumGloabParas.EnumThreadState.Run &&
                   this.ThreadState == EnumGloabParas.EnumThreadState.Run)
            {
                #region 始终在执行线程
                try
                {
                    if (m_GatherTaskManage.runWebUrls[this.ThreadIndex].Count > 0)
                    {
                        #region 采集网页数据中
                        lock ((m_GatherTaskManage.runWebUrls[this.ThreadIndex]).SyncRoot)
                        {
                            if (m_GatherTaskManage.runWebUrls[this.ThreadIndex].Count > 0)
                            {
                                lock (((ICollection)m_GatherTaskManage.completeThreadList).SyncRoot)
                                {
                                    if (m_GatherTaskManage.completeThreadList.Contains(this.ThreadName))
                                    {
                                        lock (((ICollection)m_GatherTaskManage.completeThreadList).SyncRoot)
                                        {
                                            if (m_GatherTaskManage.completeThreadList.Contains(this.ThreadName))
                                            {
                                                m_GatherTaskManage.completeThreadList.Remove(this.ThreadName);
                                            }
                                        }
                                    }
                                }
                                #region 运行采集网页数据
                                gatherUrlItem = m_GatherTaskManage.runWebUrls[this.ThreadIndex].Dequeue();

                                if (gatherUrlItem.GaterherFlag == EnumGloabParas.EnumUrlGaterherState.Run)
                                {
                                    bool   exitFlag = false;
                                    string errMsg   = string.Empty;
                                    GatherWebUrl(gatherUrlItem, ref exitFlag, ref errMsg);
                                    if (exitFlag)
                                    {
                                        break;
                                    }
                                }
                                else
                                {
                                    #region 查找下级网页
                                    m_GatherTaskManage.OnLog(new cGatherEventArgs()
                                    {
                                        Message     = gatherUrlItem.Url + "查找下级网页中.",
                                        MessageType = EnumGloabParas.EnumMessageType.NOTIFY,
                                        ThreadName  = ThreadName
                                    });

                                    string html     = string.Empty;
                                    int    tryCount = m_GatherTaskManage.TryAgainCount;
                                    bool   exitFlag = false;
                                    #region 重试获取网页中
                                    while (true)
                                    {
                                        try
                                        {
                                            html = this.GetHtml(gatherUrlItem.Url, m_GatherTaskManage.WebCookie, m_GatherTaskManage.WebEncode, gatherUrlItem.StartPos, gatherUrlItem.EndPos, m_GatherTaskManage.IsAjax);
                                        }
                                        catch (Exception ex)
                                        {
                                            m_GatherTaskManage.OnLog(new cGatherEventArgs()
                                            {
                                                Message     = DateTime.Now.Ticks + " " + gatherUrlItem.Url + "采集出现错误,重试中." + ex.Message,
                                                MessageType = EnumGloabParas.EnumMessageType.ERROR,
                                                ThreadName  = ThreadName
                                            });
                                            if (tryCount > 0)
                                            {
                                                tryCount--;
                                                continue;
                                            }
                                            exitFlag = true;
                                        }
                                        break;
                                    }



                                    #endregion
                                    if (exitFlag)
                                    {
                                        m_GatherTaskManage.OnLog(new cGatherEventArgs()
                                        {
                                            Message     = gatherUrlItem.Url + "查找下级网页失败.",
                                            MessageType = EnumGloabParas.EnumMessageType.ERROR,
                                            ThreadName  = ThreadName
                                        });
                                        if (e_CompleteCount != null)
                                        {
                                            e_CompleteCount(new cGatherCompleteCountEventArgs()
                                            {
                                                CompleteType = EnumGloabParas.EnumThreadCompleteType.Error,
                                                StartPos     = gatherUrlItem.StartPos,
                                                EndPos       = gatherUrlItem.EndPos,
                                                GaterherFlag = EnumGloabParas.EnumUrlGaterherState.FirstError,
                                                Level        = gatherUrlItem.Level,
                                                LevelUrlList = gatherUrlItem.LevelUrlList,
                                                NextPageText = gatherUrlItem.NextPageText,
                                                Url          = gatherUrlItem.Url,
                                            });
                                        }
                                        continue;
                                    }
                                    if (!string.IsNullOrEmpty(gatherUrlItem.NextPageText))
                                    {
                                        string nextUrl = _getTempData.GetNextPage(gatherUrlItem.Url, html, gatherUrlItem.NextPageText);
                                        if (!string.IsNullOrEmpty(nextUrl))
                                        {
                                            nextUrl = this.GetNextUrl(nextUrl, gatherUrlItem.Url);
                                            if (!m_GatherTaskManage.ContainsUrl(nextUrl))
                                            {
                                                //这里始终加入到第一进程中,提前采集可用的网址,当无网址时 0线程将会采集Run的网页
                                                m_GatherTaskManage.runWebUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlItem()
                                                {
                                                    GaterherFlag = gatherUrlItem.GaterherFlag,
                                                    LevelUrlList = gatherUrlItem.LevelUrlList,
                                                    NextPageText = gatherUrlItem.NextPageText,
                                                    Url          = nextUrl,
                                                    Level        = gatherUrlItem.Level,
                                                    StartPos     = gatherUrlItem.StartPos,
                                                    EndPos       = gatherUrlItem.EndPos,
                                                });
                                                if (e_TotalCount != null)
                                                {
                                                    e_TotalCount(new cGatherCompletedEventArgs()
                                                    {
                                                        ErrorCount = 0,
                                                        TotalCount = 1,
                                                        TrueCount  = 0,
                                                    });
                                                }
                                                m_GatherTaskManage.OnLog(new cGatherEventArgs()
                                                {
                                                    Message     = gatherUrlItem.Url + "获取下一页导航成功.",
                                                    MessageType = EnumGloabParas.EnumMessageType.NOTIFY,
                                                    ThreadName  = ThreadName
                                                });
                                            }
                                        }
                                        else
                                        {
                                            m_GatherTaskManage.OnLog(new cGatherEventArgs()
                                            {
                                                Message     = gatherUrlItem.Url + "获取下一页导航失败.",
                                                MessageType = EnumGloabParas.EnumMessageType.ERROR,
                                                ThreadName  = ThreadName
                                            });
                                            //这里为了避免没有下一页,不计入ErrorCount
                                        }
                                    }
                                    if (gatherUrlItem.LevelUrlList.Count > gatherUrlItem.Level)
                                    {
                                        #region 获取下页导航数据
                                        string        nextUrl   = gatherUrlItem.LevelUrlList[gatherUrlItem.Level];
                                        List <string> levelUrls = GetNextLevelUrl(gatherUrlItem.Url, html, nextUrl);
                                        m_GatherTaskManage.OnLog(new cGatherEventArgs()
                                        {
                                            Message     = gatherUrlItem.Url + "获取新的网页个数" + levelUrls.Count + ".",
                                            MessageType = EnumGloabParas.EnumMessageType.NOTIFY,
                                            ThreadName  = ThreadName
                                        });
                                        if (levelUrls.Count == 0)
                                        {
                                            m_GatherTaskManage.OnLog(new cGatherEventArgs()
                                            {
                                                Message     = html,
                                                MessageType = EnumGloabParas.EnumMessageType.INFO,
                                                ThreadName  = ThreadName
                                            });
                                        }
                                        foreach (var levelUrl in levelUrls)
                                        {
                                            if (m_GatherTaskManage.ContainsUrl(levelUrl))
                                            {
                                                continue;
                                            }
                                            if (gatherUrlItem.LevelUrlList.Count > gatherUrlItem.Level + 1)
                                            {
                                                //这里始终加入到第一进程中,提前采集可用的网址,当无网址时 0线程将会采集Run的网页
                                                m_GatherTaskManage.runWebUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlItem()
                                                {
                                                    GaterherFlag = EnumGloabParas.EnumUrlGaterherState.First,
                                                    LevelUrlList = gatherUrlItem.LevelUrlList,
                                                    NextPageText = "",//下一页标识只针对一级页面有效的
                                                    Url          = levelUrl,
                                                    Level        = gatherUrlItem.Level + 1,
                                                    StartPos     = "",
                                                    EndPos       = "",
                                                });
                                            }
                                            else
                                            {
                                                //平均分配网址到线程中采集
                                                m_GatherTaskManage.runWebUrls[m_GatherTaskManage.ThreadPos].Enqueue(new cGatherUrlItem()
                                                {
                                                    GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Run,
                                                    LevelUrlList = gatherUrlItem.LevelUrlList,
                                                    NextPageText = gatherUrlItem.NextPageText,
                                                    Url          = levelUrl,
                                                    Level        = gatherUrlItem.Level + 1,
                                                    StartPos     = m_GatherTaskManage.LastStartPos,
                                                    EndPos       = m_GatherTaskManage.LastEndPos,
                                                });
                                                if (e_TotalCount != null)
                                                {
                                                    e_TotalCount(new cGatherCompletedEventArgs()
                                                    {
                                                        ErrorCount = 0,
                                                        TotalCount = 1,
                                                        TrueCount  = 0,
                                                    });
                                                }
                                            }
                                        }
                                        #endregion
                                        if (e_CompleteCount != null)
                                        {
                                            e_CompleteCount(new cGatherCompleteCountEventArgs()
                                            {
                                                CompleteType = EnumGloabParas.EnumThreadCompleteType.Success,
                                                StartPos     = gatherUrlItem.StartPos,
                                                EndPos       = gatherUrlItem.EndPos,
                                                GaterherFlag = EnumGloabParas.EnumUrlGaterherState.Completed,
                                                Level        = gatherUrlItem.Level,
                                                LevelUrlList = gatherUrlItem.LevelUrlList,
                                                NextPageText = gatherUrlItem.NextPageText,
                                                Url          = gatherUrlItem.Url,
                                            });
                                        }
                                    }
                                    else
                                    {
                                        exitFlag = false;
                                        string errMsg = string.Empty;
                                        GatherWebUrl(gatherUrlItem, ref exitFlag, ref errMsg);
                                        if (exitFlag)
                                        {
                                            break;
                                        }
                                    }

                                    #endregion
                                }
                                #endregion
                            }
                        }
                        #endregion
                    }
                    else if (m_GatherTaskManage.runFileUrls[this.ThreadIndex].Count > 0)
                    {
                        #region 采集图片队列
                        lock ((m_GatherTaskManage.runFileUrls[this.ThreadIndex]).SyncRoot)
                        {
                            if (m_GatherTaskManage.runFileUrls[this.ThreadIndex].Count > 0)
                            {
                                if (m_GatherTaskManage.completeThreadList.Contains(this.ThreadName))
                                {
                                    m_GatherTaskManage.completeThreadList.Remove(this.ThreadName);
                                }
                                cGatherUrlBaseItem gatherFileItem = m_GatherTaskManage.runFileUrls[this.ThreadIndex].Dequeue();
                                this.DownFileQueue(gatherFileItem.Url);
                            }
                        }
                        #endregion
                    }
                    else
                    {
                        if (e_OnGatherNotityCompleted != null)
                        {
                            e_OnGatherNotityCompleted(ThreadName);
                        }
                    }
                }
                catch (Exception ex)
                {
                    string errMsg = "error:" + ex.Message + ex.Source + ex.StackTrace;
                    m_GatherTaskManage.OnLog(new cGatherEventArgs()
                    {
                        Message     = errMsg,
                        MessageType = EnumGloabParas.EnumMessageType.ERROR,
                        ThreadName  = ThreadName,
                    });
                    DMSFrame.Loggers.LoggerManager.FileLogger.LogWithTime(errMsg);
                }
                finally
                {
                    //Monitor.Exit(m_GatherTaskManage);
                }
                #endregion
            }
        }
Beispiel #4
0
 public void Enqueue(cGatherUrlBaseItem item)
 {
     this.FileUrls.Enqueue(item);
 }