Esempio n. 1
0
        /// 处理 分解采集任务 错误事件
        private void TaskThreadError(object sender, TaskThreadErrorEventArgs e)
        {
            //当采集发生错误后,系统首先需要检测当前是否连接网络
            //如果没有连接网络,即无Internet,则系统停止此任务执行
            if (cTool.IsLinkInternet() == false)
            {
                Stop();

                m_State = cGlobalParas.TaskState.Failed;

                if (e_TaskFailed != null)
                {
                    e_TaskFailed(this, new cTaskEventArgs(TaskID, TaskName, false));
                }

                return;
            }


            cGatherTaskSplit gt = (cGatherTaskSplit)sender;

            //如果出错调用此事件,也表示完成了一个网址的采集,但是出错了


            //一个线程发生错误并不允许停止整个任务执行,即便所有线程都发生促务
            //也需要保障任务执行,只是把任务出错信息写入日志

            //if (gt.ErrorCount >= cGatherManage.MaxErrorCount)
            //{
            // 达到最大错误数,停止当前线程
            //bool failed = true;

            // 如果当前任务所有的线程都停止了,则判断为任务失败
            //foreach (cGatherTaskSplit dtc in m_list_GatherTaskSplit)
            //{
            //    if (!gt.Equals(dtc) && dtc.IsThreadAlive)
            //    {
            //        failed = false;
            //        break;
            //    }
            //}
            //if (failed)
            //{
            //    State = cGlobalParas.TaskState.Failed;
            //    return;
            //}
            //}



            if (e_TaskError != null)
            {
                e_TaskError(this, new TaskErrorEventArgs(gt, e.Error));
            }
            //}
        }
Esempio n. 2
0
        /// 分解采集任务 线程完成 事件处理 判断的是独立线程,每个线程完成后
        /// 都需要触发任务完成事件,交由任务继续判断,如果完成则调用任务完成
        /// 事件,告诉程序此任务已经完成
        private void TaskWorkThreadCompleted(object sender, cTaskEventArgs e)
        {
            cGatherTaskSplit dtc = (cGatherTaskSplit)sender;

            if (dtc.UrlCount == dtc.GatherErrUrlCount + dtc.GatheredUrlCount)
            {
                // 任务采集完成
                onTaskCompleted();
            }
        }
Esempio n. 3
0
 //将分解任务事件进行绑定
 private void TaskEventInit(cGatherTaskSplit dtc)
 {
     if (!dtc.IsInitialized)
     {
         // 绑定 初始化事件、完成事件
         dtc.TaskInit     += this.TaskWorkThreadInit;
         dtc.Completed    += this.TaskWorkThreadCompleted;
         dtc.GUrlCount    += this.onGUrlCount;
         dtc.Log          += this.onLog;
         dtc.GData        += this.onGData;
         dtc.Error        += this.TaskThreadError;
         dtc.IsInitialized = true;
     }
 }
Esempio n. 4
0
        /// 任务初始化,由分解任务触发,
        private void TaskWorkThreadInit(object sender, TaskInitializedEventArgs e)
        {
            cGatherTaskSplit dtc = (cGatherTaskSplit)sender;

            m_TaskData.TaskID = e.TaskID;

            if (e_TaskThreadInitialized != null)
            {
                // 代理触发 任务初始化 事件
                m_TaskManage.EventProxy.AddEvent(delegate()
                {
                    e_TaskThreadInitialized(this, new TaskInitializedEventArgs(m_TaskData.TaskID));
                });
            }
        }
 internal void ReStartWaitingWorkThread(cGatherTaskSplit dtc)
 {
     m_WaitingWorkThread.Remove(dtc);
     dtc.ReStart();
 }
 internal void AddWaitingWorkThread(cGatherTaskSplit dtc)
 {
     dtc.Waittime = cGatherTaskList.Waittime;
     m_WaitingWorkThread.Add(dtc);
 }
 /// <summary>
 /// 
 /// </summary>
 /// <param name="dtc">��������ķֿ�</param>
 /// <param name="error">������쳣</param>
 public TaskErrorEventArgs(cGatherTaskSplit dtc, Exception error)
 {
     m_Error = error;
         m_ErrorThread = dtc;
 }
Esempio n. 8
0
        /// 初始化采集任务线程
        private void TaskInit()
        {
            string sPath = m_TaskData.SavePath + "\\" + m_TaskData.TaskName + "_file";

            ///任务初始化分为两种情况,一种是未启动执行的任务,一种是已经启动但未执行完毕的任务
            ///
            //m_TaskData.GatheredUrlCount = 0;
            //m_TaskData.GatherErrUrlCount = 0;
            //m_TaskData.TrueUrlCount = m_TaskData.UrlCount;

            if (!m_IsDataInitialized)
            {
                if (m_list_GatherTaskSplit.Count > 0)
                {   // 清理可能存在的子线程
                    foreach (cGatherTaskSplit dtc in m_list_GatherTaskSplit)
                    {
                        dtc.Stop();
                    }
                    m_list_GatherTaskSplit.Clear();
                }

                if (IsCompleted)
                {
                    // 修改此采集任务的状态为已采集完成,设置为状态为已完成,需要出发事件
                    m_State = cGlobalParas.TaskState.Completed;

                    //m_State = cGlobalParas.TaskState.Completed;

                    //e_TaskCompleted(this, new cTaskEventArgs(m_TaskData.TaskID, false));
                }
                else
                {
                    cGatherTaskSplit dtc;

                    if (m_TaskData.TaskSplitData.Count > 0)
                    {
                        foreach (cTaskSplitData configData in m_TaskData.TaskSplitData)
                        {
                            dtc             = new cGatherTaskSplit();
                            dtc.TaskManage  = m_TaskManage;
                            dtc.TaskID      = m_TaskData.TaskID;
                            dtc.WebCode     = m_TaskData.WebCode;
                            dtc.IsUrlEncode = m_TaskData.IsUrlEncode;
                            dtc.UrlEncode   = m_TaskData.UrlEncode;
                            dtc.Cookie      = m_TaskData.Cookie;
                            dtc.StartPos    = m_TaskData.StartPos;
                            dtc.EndPos      = m_TaskData.EndPos;
                            dtc.SavePath    = sPath;
                            dtc.AgainNumber = m_TaskData.GatherAgainNumber;
                            dtc.Ignore404   = m_TaskData.IsIgnore404;
                            dtc.IsErrorLog  = m_TaskData.IsErrorLog;

                            dtc.TaskSplitData = configData;

                            m_list_GatherTaskSplit.Add(dtc);

                            dtc = null;
                        }
                    }
                    else
                    {
                        dtc             = new cGatherTaskSplit();
                        dtc.TaskManage  = m_TaskManage;
                        dtc.TaskID      = m_TaskData.TaskID;
                        dtc.WebCode     = m_TaskData.WebCode;
                        dtc.IsUrlEncode = m_TaskData.IsUrlEncode;
                        dtc.UrlEncode   = m_TaskData.UrlEncode;
                        dtc.Cookie      = m_TaskData.Cookie;
                        dtc.StartPos    = m_TaskData.StartPos;
                        dtc.EndPos      = m_TaskData.EndPos;
                        dtc.SavePath    = sPath;
                        dtc.AgainNumber = m_TaskData.GatherAgainNumber;
                        dtc.Ignore404   = m_TaskData.IsIgnore404;
                        dtc.IsErrorLog  = m_TaskData.IsErrorLog;


                        // 新任务,则新建子线程
                        m_list_GatherTaskSplit.Add(dtc);

                        dtc = null;
                    }


                    foreach (cGatherTaskSplit TaskSplit in m_list_GatherTaskSplit)
                    {
                        // 初始化所有子线程
                        TaskEventInit(TaskSplit);
                    }
                }

                m_IsDataInitialized = true;
            }
        }
Esempio n. 9
0
        //根据指定的任务ID对当前的任务进行分解,如果有导航页,也需要在此进行
        //分解
        //并初始化此任务的关键数据
        private void SplitTask()
        {
            cGatherTaskSplit     dtc;
            List <Task.cWebLink> tWeblink;

            Task.cTask t = new Task.cTask();
            //m_TaskData.TaskID = e.TaskID;

            //根据指定的TaskID加载任务地址信息
            try
            {
                t.LoadTask(Int64.Parse(m_TaskData.TaskID.ToString()));
            }
            catch (System.Exception)
            {
                //调试实体文件加载失败,有可能是文件丢失所造成
                //但还是需要加载一个空信息,以便界面可以显示此丢失的任务
                //这样用户可以通过界面操作删除此任务内容,这是一个针对
                //丢失文件的处理手段
                m_TaskData.SavePath    = "";
                m_TaskData.TaskDemo    = "";
                m_TaskData.StartPos    = "";
                m_TaskData.EndPos      = "";
                m_TaskData.Cookie      = "";
                m_TaskData.WebCode     = cGlobalParas.WebCode.auto;
                m_TaskData.IsLogin     = false;
                m_TaskData.LoginUrl    = "";
                m_TaskData.PublishType = cGlobalParas.PublishType.NoPublish;
                m_TaskData.IsUrlEncode = false;
                m_TaskData.UrlEncode   = "";
                m_TaskData.Weblink     = null;
                m_TaskData.CutFlag     = null;

                return;
            }

            ////加载页面的采集起始位置和终止位置
            ///此两项数据不在taskrun中存储,是在任务的xml文件中存储
            ///但m_TaskData是按照taskrun来加载的数据,所以无法加载此两
            ///项值和采集页面的规则及网址。
            ///为什么从taskrun中加载,是因为在索引taskrun的时候可以显示界面
            ///信息,所以就共用了一个加载信息的内容
            m_TaskData.SavePath    = t.SavePath;
            m_TaskData.TaskDemo    = t.TaskDemo;
            m_TaskData.StartPos    = t.StartPos;
            m_TaskData.EndPos      = t.EndPos;
            m_TaskData.Cookie      = t.Cookie;
            m_TaskData.WebCode     = (cGlobalParas.WebCode) int.Parse(t.WebCode);
            m_TaskData.IsLogin     = t.IsLogin;
            m_TaskData.LoginUrl    = t.LoginUrl;
            m_TaskData.PublishType = (cGlobalParas.PublishType) int.Parse(t.ExportType);
            m_TaskData.IsUrlEncode = t.IsUrlEncode;
            m_TaskData.UrlEncode   = t.UrlEncode;

            m_TaskData.GatherAgainNumber = t.GatherAgainNumber;
            m_TaskData.IsIgnore404       = t.IsIgnore404;
            m_TaskData.IsErrorLog        = t.IsErrorLog;
            m_TaskData.IsDelRepRow       = t.IsDelRepRow;
            m_TaskData.IsTrigger         = t.IsTrigger;
            if (t.IsTrigger == true)
            {
                m_TaskData.TriggerType = t.TriggerType;
                m_TaskData.TriggerTask = t.TriggerTask;
            }

            ////加载网页地址数据及采集标志数据
            ////再次去处理如果带有参数的网址,则需要进行分解
            ////确保加载的网址肯定是一个有效的网址
            ////注意,此时由于有可能分解任务信息,所以,网址数量在此会发生变化,所以,最终还需修改网址数据
            Task.cWebLink    w;
            Task.cUrlAnalyze u = new Task.cUrlAnalyze();

            for (int i = 0; i < t.WebpageLink.Count; i++)
            {
                if (Regex.IsMatch(t.WebpageLink[i].Weblink.ToString(), "{.*}"))
                {
                    List <string> Urls;

                    if (m_TaskData.IsUrlEncode == true)
                    {
                        Urls = u.SplitWebUrl(t.WebpageLink[i].Weblink.ToString());
                    }
                    else
                    {
                        Urls = u.SplitWebUrl(t.WebpageLink[i].Weblink.ToString());
                    }

                    //开始添加m_TaskData.weblink数据
                    for (int j = 0; j < Urls.Count; j++)
                    {
                        w              = new Task.cWebLink();
                        w.IsGathered   = t.WebpageLink[i].IsGathered;
                        w.IsNavigation = t.WebpageLink[i].IsNavigation;
                        w.IsNextpage   = t.WebpageLink[i].IsNextpage;
                        w.NextPageRule = t.WebpageLink[i].NextPageRule;
                        w.Weblink      = Urls[j].ToString();

                        //加载导航数据
                        if (t.WebpageLink[i].IsNavigation == true)
                        {
                            w.NavigRules = t.WebpageLink[i].NavigRules;
                        }

                        m_TaskData.Weblink.Add(w);
                        w = null;
                    }
                }
                else
                {
                    m_TaskData.Weblink.Add(t.WebpageLink[i]);
                }
            }

            u = null;

            m_TaskData.CutFlag = t.WebpageCutFlag;

            string sPath = m_TaskData.SavePath + "\\" + m_TaskData.TaskName + "_file";

            //重新初始化UrlCount
            //m_TaskData.UrlCount = m_TaskData.Weblink.Count;

            //开始进行任务分块,但此任务的Url数必须大于线程数,且线程数>1
            if (m_TaskData.UrlCount > m_TaskData.ThreadCount && m_TaskData.ThreadCount > 1)
            {
                int SplitUrlCount = (int)Math.Ceiling((decimal)m_TaskData.UrlCount / (decimal)m_TaskData.ThreadCount);

                //设置每个分解任务的起始Url索引和终止的Url索引
                int StartIndex = 0;
                int EndIndex   = 0;
                int j          = 0;

                //for (int i = 1; i <= SplitUrlCount; i++)
                for (int i = 1; i <= m_TaskData.ThreadCount; i++)
                {
                    StartIndex = EndIndex;
                    if (i == m_TaskData.ThreadCount)
                    {
                        EndIndex = m_TaskData.Weblink.Count;
                    }
                    else
                    {
                        //EndIndex = i * m_TaskData.ThreadCount;
                        EndIndex = i * SplitUrlCount;
                    }

                    //初始化分解采集任务类
                    dtc             = new cGatherTaskSplit();
                    dtc.TaskManage  = m_TaskManage;
                    dtc.TaskID      = m_TaskData.TaskID;
                    dtc.WebCode     = m_TaskData.WebCode;
                    dtc.IsUrlEncode = m_TaskData.IsUrlEncode;
                    dtc.UrlEncode   = m_TaskData.UrlEncode;
                    dtc.Cookie      = m_TaskData.Cookie;
                    dtc.StartPos    = m_TaskData.StartPos;
                    dtc.EndPos      = m_TaskData.EndPos;
                    dtc.SavePath    = sPath;
                    dtc.AgainNumber = m_TaskData.GatherAgainNumber;
                    dtc.Ignore404   = m_TaskData.IsIgnore404;
                    dtc.IsErrorLog  = m_TaskData.IsErrorLog;

                    tWeblink = new List <Task.cWebLink>();

                    for (j = StartIndex; j < EndIndex; j++)
                    {
                        tWeblink.Add(m_TaskData.Weblink[j]);
                    }

                    //初始化分解的子任务数据
                    dtc.SetSplitData(StartIndex, EndIndex - 1, tWeblink, m_TaskData.CutFlag);

                    m_TaskData.TaskSplitData.Add(dtc.TaskSplitData);

                    tWeblink = null;
                    dtc      = null;
                }
            }
            else
            {
                //初始化分解采集任务类
                dtc             = new cGatherTaskSplit();
                dtc.TaskManage  = m_TaskManage;
                dtc.TaskID      = m_TaskData.TaskID;
                dtc.WebCode     = m_TaskData.WebCode;
                dtc.IsUrlEncode = m_TaskData.IsUrlEncode;
                dtc.UrlEncode   = m_TaskData.UrlEncode;
                dtc.Cookie      = m_TaskData.Cookie;
                dtc.StartPos    = m_TaskData.StartPos;
                dtc.EndPos      = m_TaskData.EndPos;
                dtc.SavePath    = sPath;
                dtc.AgainNumber = m_TaskData.GatherAgainNumber;
                dtc.Ignore404   = m_TaskData.IsIgnore404;
                dtc.IsErrorLog  = m_TaskData.IsErrorLog;


                dtc.SetSplitData(0, m_TaskData.UrlCount - 1, m_TaskData.Weblink, m_TaskData.CutFlag);
                m_TaskData.TaskSplitData.Add(dtc.TaskSplitData);
                //m_list_GatherTaskSplit.Add(dtc);
            }

            t   = null;
            dtc = null;
        }
Esempio n. 10
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="dtc">发生错误的分块</param>
 /// <param name="error">捕获的异常</param>
 public TaskErrorEventArgs(cGatherTaskSplit dtc, Exception error)
 {
     m_Error       = error;
     m_ErrorThread = dtc;
 }
Esempio n. 11
0
 internal void ReStartWaitingWorkThread(cGatherTaskSplit dtc)
 {
     m_WaitingWorkThread.Remove(dtc);
     dtc.ReStart();
 }
Esempio n. 12
0
 internal void AddWaitingWorkThread(cGatherTaskSplit dtc)
 {
     dtc.Waittime = cGatherTaskList.Waittime;
     m_WaitingWorkThread.Add(dtc);
 }