Пример #1
0
        //根据提供的taskid加载任务信息
        //数据不应该是传进来,是读取文件的,但现在不支持事务处理,所以传进来
        private void LoadTaskInfo(Int64 TaskID, System.Data.DataTable dData)
        {
            //DataTable dt = new DataTable();
            Task.cTask t = new Task.cTask();

            t.LoadTask(Program.getPrjPath() + "tasks\\run\\task" + TaskID + ".xml");

            string FileName = t.SavePath + "\\" + t.TaskName + "-" + t.TaskID + ".xml";

            m_pTaskData.TaskID   = t.TaskID;
            m_pTaskData.TaskName = t.TaskName;
            //m_pTaskData.DataPwd =t.DataPwd ;
            m_pTaskData.ExportFile = t.ExportFile;
            m_pTaskData.DataSource = t.DataSource;
            //m_pTaskData.DataUser =t.DataUser ;
            m_pTaskData.FileName = FileName;

            //dt.ReadXml(FileName);
            //需要保存的或者导出的数据还是传入,因为需要临时数据的保存
            //下一版需要将临时数据保存和发布数据进行分离
            m_pTaskData.PublishData           = dData;
            m_pTaskData.PublishData.TableName = t.TaskName + "-" + t.TaskID + ".xml";

            m_pTaskData.PublishType   = (cGlobalParas.PublishType)(int.Parse(t.ExportType));
            m_pTaskData.DataTableName = t.DataTableName;

            m_pTaskData.InsertSql     = t.InsertSql;
            m_pTaskData.ExportUrl     = t.ExportUrl;
            m_pTaskData.ExportUrlCode = t.ExportUrlCode;
            m_pTaskData.ExportCookie  = t.ExportCookie;

            m_pTaskData.IsErrorLog = t.IsErrorLog;

            m_pTaskData.IsTrigger = t.IsTrigger;

            m_pTaskData.IsExportHeader = t.IsExportHeader;

            if (t.IsTrigger == true)
            {
                m_pTaskData.TriggerType = t.TriggerType;
                m_pTaskData.TriggerTask = t.TriggerTask;
            }

            t = null;
        }
Пример #2
0
        public Int64 InsertTaskRun(string Path, string File)
        {
            ///�����жϴ������ִ�е�Ŀ¼�Ƿ����
            ///��Ŀ¼�ǹ̶�Ŀ¼�������ϵͳ\\Task\\run

            string RunPath = Program.getPrjPath() + "Tasks\\run";

            if (!System.IO.Directory.Exists(RunPath))
            {
                System.IO.Directory.CreateDirectory(RunPath);
            }

            ///�Ƚ��������ժҪ��Ϣ���ص�TaskRun.xml�ļ���
            Task.cTask t = new Task.cTask();
            t.LoadTask(Path + "\\" + File);

            //��ʼ����xml�ڵ�����
            LoadTaskRunData();
            Int64 maxID = GetNewID();

            string tRunxml = "";
            tRunxml = "<TaskID>" + maxID + "</TaskID>";
            tRunxml += "<TaskName>" + t.TaskName + "</TaskName>";
            tRunxml += "<TaskState>" + (int)cGlobalParas.TaskState.UnStart + "</TaskState>";
            tRunxml += "<TaskType>" + t.TaskType + "</TaskType>";
            tRunxml += "<RunType>" + t.RunType + "</RunType>";
            tRunxml += "<ExportFile>" + t.ExportFile + "</ExportFile>";
            tRunxml += "<tempFile>" + t.SavePath + "\\" + t.TaskName + "-" + maxID + ".xml" + "</tempFile>";
            tRunxml += "<StartDate>" + DateTime.Now + "</StartDate>";
            tRunxml += "<EndDate></EndDate>";
            tRunxml += "<ThreadCount>" + t.ThreadCount + "</ThreadCount>";
            tRunxml += "<UrlCount>" + t.UrlCount + "</UrlCount>";

            ///TrueUrlCount��ʾ����ɼ�����ַ�д��ڵ�����ַ������Ҫ�ɼ�����ַ���޷����ݹ�ʽ���˳�����
            ///��Ҫ�ɼ����񲻶�ִ�У����ϸ��ݲɼ��Ĺ�����м���ɼ���ַ��������������Ҫ�ٴμ�¼��ֵ
            ///��¼��ֵ��Ŀ����Ϊ�˿��Ը��õĸ��ٲɼ��Ľ��ȣ���Urlcount�����޸ģ���Ϊ��ֵҪ��������ֽ�
            ///ʹ�ã�����ı���UrlCount����ܵ�������ֽ�ʧ�ܣ�����Ӫ�����ʼ����ʱ�򣬴�ֵͬUrlCount����ֵ��
            ///������������Ӫʱά��
            tRunxml += "<TrueUrlCount>" + t.UrlCount + "</TrueUrlCount>";

            tRunxml += "<GatheredUrlCount>0</GatheredUrlCount>";
            tRunxml += "<GatheredTrueUrlCount>0</GatheredTrueUrlCount>";
            tRunxml += "<ErrUrlCount>0</ErrUrlCount>";
            tRunxml += "<TrueErrUrlCount>0</TrueErrUrlCount>";

            tRunxml += "<IsLogin>" + t.IsLogin + "</IsLogin>";
            tRunxml += "<PublishType>" + t.ExportType + "</PublishType>";

            xmlConfig.InsertElement("Tasks", "Task", tRunxml);
            xmlConfig.Save();
            xmlConfig = null;

            ///������������xml�ļ��ĸ�ʽ��Task�����ʽ��ȫһ�۸�����������ʽ��ȫ��ͬ
            ///������ʽ�ǰ���Task����ǰ�ļ���Taskrun�е�id����������������Ŀ����֧��ͬһ������
            ///���Խ����������ʵ����Ҳ���ǵ�����������е�ʱ���û�Ҳ�����޸Ĵ����������
            ///һ��ʵ����ʼ���С�
            System.IO.File.Copy(Path + "\\" + File, RunPath + "\\" + "Task" + maxID + ".xml", true);

            //�ļ�������ȥ����Ҫ�޸��ļ��е�TaskID�����Ǻ�TaskRun�е�TaskID����������
            //�ڼ����ļ���ʱ������,ϵͳ��ID����Ψһ����
            cXmlIO xmlFile;
            xmlFile = new cXmlIO(RunPath + "\\" + "Task" + maxID + ".xml");
            string tID = xmlFile.GetNodeValue("Task/BaseInfo/ID");
            xmlFile.EditNode("ID", tID, maxID.ToString());
            xmlFile.Save();
            xmlFile = null;

            return maxID ;
        }
Пример #3
0
        public Int64 InsertTaskRun(string Path, string File)
        {
            ///首先判断存放任务执行的目录是否存在
            ///此目录是固定目录,存放在系统\\Task\\run


            string RunPath = Program.getPrjPath() + "Tasks\\run";

            if (!System.IO.Directory.Exists(RunPath))
            {
                System.IO.Directory.CreateDirectory(RunPath);
            }

            ///先将此任务的摘要信息加载到TaskRun.xml文件中
            Task.cTask t = new Task.cTask();
            t.LoadTask(Path + "\\" + File);

            //开始构造xml节点内容
            LoadTaskRunData();
            Int64 maxID = GetNewID();

            string tRunxml = "";

            tRunxml  = "<TaskID>" + maxID + "</TaskID>";
            tRunxml += "<TaskName>" + t.TaskName + "</TaskName>";
            tRunxml += "<TaskState>" + (int)cGlobalParas.TaskState.UnStart + "</TaskState>";
            tRunxml += "<TaskType>" + t.TaskType + "</TaskType>";
            tRunxml += "<RunType>" + t.RunType + "</RunType>";
            tRunxml += "<ExportFile>" + t.ExportFile + "</ExportFile>";
            tRunxml += "<tempFile>" + t.SavePath + "\\" + t.TaskName + "-" + maxID + ".xml" + "</tempFile>";
            tRunxml += "<StartDate>" + DateTime.Now + "</StartDate>";
            tRunxml += "<EndDate></EndDate>";
            tRunxml += "<ThreadCount>" + t.ThreadCount + "</ThreadCount>";
            tRunxml += "<UrlCount>" + t.UrlCount + "</UrlCount>";

            ///TrueUrlCount表示如果采集的网址中存在导航网址,则需要采集的网址是无法根据公式极端出来的
            ///需要采集任务不断执行,不断根据采集的规则进行计算采集网址的总数,所以需要再次记录此值
            ///记录此值的目的是为了可以更好的跟踪采集的进度,但Urlcount不能修改,因为此值要进行任务分解
            ///使用,如果改变了UrlCount则可能导致任务分解失败,在运营任务初始化的时候,此值同UrlCount,此值的
            ///更改在任务运营时维护
            tRunxml += "<TrueUrlCount>" + t.UrlCount + "</TrueUrlCount>";

            tRunxml += "<GatheredUrlCount>0</GatheredUrlCount>";
            tRunxml += "<GatheredTrueUrlCount>0</GatheredTrueUrlCount>";
            tRunxml += "<ErrUrlCount>0</ErrUrlCount>";
            tRunxml += "<TrueErrUrlCount>0</TrueErrUrlCount>";

            tRunxml += "<IsLogin>" + t.IsLogin + "</IsLogin>";
            tRunxml += "<PublishType>" + t.ExportType + "</PublishType>";

            xmlConfig.InsertElement("Tasks", "Task", tRunxml);
            xmlConfig.Save();
            xmlConfig = null;

            ///运行区的任务xml文件的格式与Task任务格式完全一眼个,但命名方式完全不同
            ///命名格式是按照Task+当前文件在Taskrun中的id来命名,这样做的目的是支持同一个任务
            ///可以建立多个运行实例,也就是当这个任务运行的时候,用户也可以修改此任务后建立另
            ///一个实例开始运行。
            System.IO.File.Copy(Path + "\\" + File, RunPath + "\\" + "Task" + maxID + ".xml", true);

            //文件拷贝过去后,需要修改文件中的TaskID,以吻合TaskRun中的TaskID索引,否则
            //在加载文件的时候会出错,系统用ID来做唯一索引
            cXmlIO xmlFile;

            xmlFile = new cXmlIO(RunPath + "\\" + "Task" + maxID + ".xml");
            string tID = xmlFile.GetNodeValue("Task/BaseInfo/ID");

            xmlFile.EditNode("ID", tID, maxID.ToString());
            xmlFile.Save();
            xmlFile = null;

            return(maxID);
        }
Пример #4
0
        //根据指定的任务ID对当前的任务进行分解,如果有导航页,也需要在此进行
        //分解
        //并初始化此任务的关键数据
        private void SplitTask()
        {
            cGatherTaskSplit     dtc;
            List <Task.cWebLink> tWeblink;

            Task.cTask t = new Task.cTask();
            //m_TaskData.TaskID = e.TaskID;

            //根据指定的TaskID加载任务地址信息
            try
            {
                t.LoadTask(Int64.Parse(m_TaskData.TaskID.ToString()));
            }
            catch (System.Exception)
            {
                //调试实体文件加载失败,有可能是文件丢失所造成
                //但还是需要加载一个空信息,以便界面可以显示此丢失的任务
                //这样用户可以通过界面操作删除此任务内容,这是一个针对
                //丢失文件的处理手段
                m_TaskData.SavePath    = "";
                m_TaskData.TaskDemo    = "";
                m_TaskData.StartPos    = "";
                m_TaskData.EndPos      = "";
                m_TaskData.Cookie      = "";
                m_TaskData.WebCode     = cGlobalParas.WebCode.auto;
                m_TaskData.IsLogin     = false;
                m_TaskData.LoginUrl    = "";
                m_TaskData.PublishType = cGlobalParas.PublishType.NoPublish;
                m_TaskData.IsUrlEncode = false;
                m_TaskData.UrlEncode   = "";
                m_TaskData.Weblink     = null;
                m_TaskData.CutFlag     = null;

                return;
            }

            ////加载页面的采集起始位置和终止位置
            ///此两项数据不在taskrun中存储,是在任务的xml文件中存储
            ///但m_TaskData是按照taskrun来加载的数据,所以无法加载此两
            ///项值和采集页面的规则及网址。
            ///为什么从taskrun中加载,是因为在索引taskrun的时候可以显示界面
            ///信息,所以就共用了一个加载信息的内容
            m_TaskData.SavePath    = t.SavePath;
            m_TaskData.TaskDemo    = t.TaskDemo;
            m_TaskData.StartPos    = t.StartPos;
            m_TaskData.EndPos      = t.EndPos;
            m_TaskData.Cookie      = t.Cookie;
            m_TaskData.WebCode     = (cGlobalParas.WebCode) int.Parse(t.WebCode);
            m_TaskData.IsLogin     = t.IsLogin;
            m_TaskData.LoginUrl    = t.LoginUrl;
            m_TaskData.PublishType = (cGlobalParas.PublishType) int.Parse(t.ExportType);
            m_TaskData.IsUrlEncode = t.IsUrlEncode;
            m_TaskData.UrlEncode   = t.UrlEncode;

            m_TaskData.GatherAgainNumber = t.GatherAgainNumber;
            m_TaskData.IsIgnore404       = t.IsIgnore404;
            m_TaskData.IsErrorLog        = t.IsErrorLog;
            m_TaskData.IsDelRepRow       = t.IsDelRepRow;
            m_TaskData.IsTrigger         = t.IsTrigger;
            if (t.IsTrigger == true)
            {
                m_TaskData.TriggerType = t.TriggerType;
                m_TaskData.TriggerTask = t.TriggerTask;
            }

            ////加载网页地址数据及采集标志数据
            ////再次去处理如果带有参数的网址,则需要进行分解
            ////确保加载的网址肯定是一个有效的网址
            ////注意,此时由于有可能分解任务信息,所以,网址数量在此会发生变化,所以,最终还需修改网址数据
            Task.cWebLink    w;
            Task.cUrlAnalyze u = new Task.cUrlAnalyze();

            for (int i = 0; i < t.WebpageLink.Count; i++)
            {
                if (Regex.IsMatch(t.WebpageLink[i].Weblink.ToString(), "{.*}"))
                {
                    List <string> Urls;

                    if (m_TaskData.IsUrlEncode == true)
                    {
                        Urls = u.SplitWebUrl(t.WebpageLink[i].Weblink.ToString());
                    }
                    else
                    {
                        Urls = u.SplitWebUrl(t.WebpageLink[i].Weblink.ToString());
                    }

                    //开始添加m_TaskData.weblink数据
                    for (int j = 0; j < Urls.Count; j++)
                    {
                        w              = new Task.cWebLink();
                        w.IsGathered   = t.WebpageLink[i].IsGathered;
                        w.IsNavigation = t.WebpageLink[i].IsNavigation;
                        w.IsNextpage   = t.WebpageLink[i].IsNextpage;
                        w.NextPageRule = t.WebpageLink[i].NextPageRule;
                        w.Weblink      = Urls[j].ToString();

                        //加载导航数据
                        if (t.WebpageLink[i].IsNavigation == true)
                        {
                            w.NavigRules = t.WebpageLink[i].NavigRules;
                        }

                        m_TaskData.Weblink.Add(w);
                        w = null;
                    }
                }
                else
                {
                    m_TaskData.Weblink.Add(t.WebpageLink[i]);
                }
            }

            u = null;

            m_TaskData.CutFlag = t.WebpageCutFlag;

            string sPath = m_TaskData.SavePath + "\\" + m_TaskData.TaskName + "_file";

            //重新初始化UrlCount
            //m_TaskData.UrlCount = m_TaskData.Weblink.Count;

            //开始进行任务分块,但此任务的Url数必须大于线程数,且线程数>1
            if (m_TaskData.UrlCount > m_TaskData.ThreadCount && m_TaskData.ThreadCount > 1)
            {
                int SplitUrlCount = (int)Math.Ceiling((decimal)m_TaskData.UrlCount / (decimal)m_TaskData.ThreadCount);

                //设置每个分解任务的起始Url索引和终止的Url索引
                int StartIndex = 0;
                int EndIndex   = 0;
                int j          = 0;

                //for (int i = 1; i <= SplitUrlCount; i++)
                for (int i = 1; i <= m_TaskData.ThreadCount; i++)
                {
                    StartIndex = EndIndex;
                    if (i == m_TaskData.ThreadCount)
                    {
                        EndIndex = m_TaskData.Weblink.Count;
                    }
                    else
                    {
                        //EndIndex = i * m_TaskData.ThreadCount;
                        EndIndex = i * SplitUrlCount;
                    }

                    //初始化分解采集任务类
                    dtc             = new cGatherTaskSplit();
                    dtc.TaskManage  = m_TaskManage;
                    dtc.TaskID      = m_TaskData.TaskID;
                    dtc.WebCode     = m_TaskData.WebCode;
                    dtc.IsUrlEncode = m_TaskData.IsUrlEncode;
                    dtc.UrlEncode   = m_TaskData.UrlEncode;
                    dtc.Cookie      = m_TaskData.Cookie;
                    dtc.StartPos    = m_TaskData.StartPos;
                    dtc.EndPos      = m_TaskData.EndPos;
                    dtc.SavePath    = sPath;
                    dtc.AgainNumber = m_TaskData.GatherAgainNumber;
                    dtc.Ignore404   = m_TaskData.IsIgnore404;
                    dtc.IsErrorLog  = m_TaskData.IsErrorLog;

                    tWeblink = new List <Task.cWebLink>();

                    for (j = StartIndex; j < EndIndex; j++)
                    {
                        tWeblink.Add(m_TaskData.Weblink[j]);
                    }

                    //初始化分解的子任务数据
                    dtc.SetSplitData(StartIndex, EndIndex - 1, tWeblink, m_TaskData.CutFlag);

                    m_TaskData.TaskSplitData.Add(dtc.TaskSplitData);

                    tWeblink = null;
                    dtc      = null;
                }
            }
            else
            {
                //初始化分解采集任务类
                dtc             = new cGatherTaskSplit();
                dtc.TaskManage  = m_TaskManage;
                dtc.TaskID      = m_TaskData.TaskID;
                dtc.WebCode     = m_TaskData.WebCode;
                dtc.IsUrlEncode = m_TaskData.IsUrlEncode;
                dtc.UrlEncode   = m_TaskData.UrlEncode;
                dtc.Cookie      = m_TaskData.Cookie;
                dtc.StartPos    = m_TaskData.StartPos;
                dtc.EndPos      = m_TaskData.EndPos;
                dtc.SavePath    = sPath;
                dtc.AgainNumber = m_TaskData.GatherAgainNumber;
                dtc.Ignore404   = m_TaskData.IsIgnore404;
                dtc.IsErrorLog  = m_TaskData.IsErrorLog;


                dtc.SetSplitData(0, m_TaskData.UrlCount - 1, m_TaskData.Weblink, m_TaskData.CutFlag);
                m_TaskData.TaskSplitData.Add(dtc.TaskSplitData);
                //m_list_GatherTaskSplit.Add(dtc);
            }

            t   = null;
            dtc = null;
        }
        //�����ṩ��taskid����������Ϣ
        //���ݲ�Ӧ���Ǵ�����,�Ƕ�ȡ�ļ���,�����ڲ�֧��������,���Դ�����
        private void LoadTaskInfo(Int64 TaskID, System.Data.DataTable dData)
        {
            //DataTable dt = new DataTable();
            Task.cTask t = new Task.cTask();

            t.LoadTask(Program.getPrjPath () + "tasks\\run\\task" + TaskID + ".xml");

            string FileName = t.SavePath  + "\\" + t.TaskName + "-" + t.TaskID + ".xml";

            m_pTaskData.TaskID =t.TaskID ;
            m_pTaskData.TaskName =t.TaskName ;
            //m_pTaskData.DataPwd =t.DataPwd ;
            m_pTaskData.ExportFile = t.ExportFile;
            m_pTaskData.DataSource =t.DataSource ;
            //m_pTaskData.DataUser =t.DataUser ;
            m_pTaskData.FileName = FileName;

            //dt.ReadXml(FileName);
            //��Ҫ����Ļ��ߵ��������ݻ��Ǵ��룬��Ϊ��Ҫ��ʱ���ݵı���
            //��һ����Ҫ����ʱ���ݱ���ͷ������ݽ��з���
            m_pTaskData.PublishData = dData ;
            m_pTaskData.PublishData.TableName = t.TaskName + "-" + t.TaskID + ".xml";

            m_pTaskData.PublishType =(cGlobalParas.PublishType)(int.Parse (t.ExportType ));
            m_pTaskData.DataTableName =t.DataTableName ;

            m_pTaskData.InsertSql = t.InsertSql;
            m_pTaskData.ExportUrl = t.ExportUrl;
            m_pTaskData.ExportUrlCode = t.ExportUrlCode;
            m_pTaskData.ExportCookie = t.ExportCookie;

            m_pTaskData.IsErrorLog = t.IsErrorLog;

            m_pTaskData.IsTrigger = t.IsTrigger;

            m_pTaskData.IsExportHeader = t.IsExportHeader;

            if (t.IsTrigger == true)
            {
                m_pTaskData.TriggerType = t.TriggerType;
                m_pTaskData.TriggerTask = t.TriggerTask;
            }

            t=null;
        }