//根据提供的taskid加载任务信息 //数据不应该是传进来,是读取文件的,但现在不支持事务处理,所以传进来 private void LoadTaskInfo(Int64 TaskID, System.Data.DataTable dData) { //DataTable dt = new DataTable(); Task.cTask t = new Task.cTask(); t.LoadTask(Program.getPrjPath() + "tasks\\run\\task" + TaskID + ".xml"); string FileName = t.SavePath + "\\" + t.TaskName + "-" + t.TaskID + ".xml"; m_pTaskData.TaskID = t.TaskID; m_pTaskData.TaskName = t.TaskName; //m_pTaskData.DataPwd =t.DataPwd ; m_pTaskData.ExportFile = t.ExportFile; m_pTaskData.DataSource = t.DataSource; //m_pTaskData.DataUser =t.DataUser ; m_pTaskData.FileName = FileName; //dt.ReadXml(FileName); //需要保存的或者导出的数据还是传入,因为需要临时数据的保存 //下一版需要将临时数据保存和发布数据进行分离 m_pTaskData.PublishData = dData; m_pTaskData.PublishData.TableName = t.TaskName + "-" + t.TaskID + ".xml"; m_pTaskData.PublishType = (cGlobalParas.PublishType)(int.Parse(t.ExportType)); m_pTaskData.DataTableName = t.DataTableName; m_pTaskData.InsertSql = t.InsertSql; m_pTaskData.ExportUrl = t.ExportUrl; m_pTaskData.ExportUrlCode = t.ExportUrlCode; m_pTaskData.ExportCookie = t.ExportCookie; m_pTaskData.IsErrorLog = t.IsErrorLog; m_pTaskData.IsTrigger = t.IsTrigger; m_pTaskData.IsExportHeader = t.IsExportHeader; if (t.IsTrigger == true) { m_pTaskData.TriggerType = t.TriggerType; m_pTaskData.TriggerTask = t.TriggerTask; } t = null; }
public Int64 InsertTaskRun(string Path, string File) { ///�����жϴ������ִ�е�Ŀ¼�Ƿ���� ///��Ŀ¼�ǹ̶�Ŀ¼�������ϵͳ\\Task\\run string RunPath = Program.getPrjPath() + "Tasks\\run"; if (!System.IO.Directory.Exists(RunPath)) { System.IO.Directory.CreateDirectory(RunPath); } ///�Ƚ��������ժҪ��Ϣ���ص�TaskRun.xml�ļ��� Task.cTask t = new Task.cTask(); t.LoadTask(Path + "\\" + File); //��ʼ����xml�ڵ����� LoadTaskRunData(); Int64 maxID = GetNewID(); string tRunxml = ""; tRunxml = "<TaskID>" + maxID + "</TaskID>"; tRunxml += "<TaskName>" + t.TaskName + "</TaskName>"; tRunxml += "<TaskState>" + (int)cGlobalParas.TaskState.UnStart + "</TaskState>"; tRunxml += "<TaskType>" + t.TaskType + "</TaskType>"; tRunxml += "<RunType>" + t.RunType + "</RunType>"; tRunxml += "<ExportFile>" + t.ExportFile + "</ExportFile>"; tRunxml += "<tempFile>" + t.SavePath + "\\" + t.TaskName + "-" + maxID + ".xml" + "</tempFile>"; tRunxml += "<StartDate>" + DateTime.Now + "</StartDate>"; tRunxml += "<EndDate></EndDate>"; tRunxml += "<ThreadCount>" + t.ThreadCount + "</ThreadCount>"; tRunxml += "<UrlCount>" + t.UrlCount + "</UrlCount>"; ///TrueUrlCount��ʾ����ɼ�����ַ�д��ڵ�����ַ������Ҫ�ɼ�����ַ�������ݹ�ʽ���˳����� ///��Ҫ�ɼ�����ִ�У����ϸ��ݲɼ��Ĺ�����м���ɼ���ַ��������������Ҫ�ٴμ�¼��ֵ ///��¼��ֵ��Ŀ����Ϊ�˿��Ը��õĸ��ٲɼ��Ľ��ȣ���Urlcount�����ģ���Ϊ��ֵҪ��������ֽ� ///ʹ�ã�����ı���UrlCount����ܵ�������ֽ�ʧ�ܣ�����Ӫ�����ʼ����ʱ��ֵͬUrlCount����ֵ�� ///������������Ӫʱά�� tRunxml += "<TrueUrlCount>" + t.UrlCount + "</TrueUrlCount>"; tRunxml += "<GatheredUrlCount>0</GatheredUrlCount>"; tRunxml += "<GatheredTrueUrlCount>0</GatheredTrueUrlCount>"; tRunxml += "<ErrUrlCount>0</ErrUrlCount>"; tRunxml += "<TrueErrUrlCount>0</TrueErrUrlCount>"; tRunxml += "<IsLogin>" + t.IsLogin + "</IsLogin>"; tRunxml += "<PublishType>" + t.ExportType + "</PublishType>"; xmlConfig.InsertElement("Tasks", "Task", tRunxml); xmlConfig.Save(); xmlConfig = null; ///������������xml�ļ��ĸ�ʽ��Task�����ʽ��ȫһ�۸�����������ʽ��ȫ��ͬ ///������ʽ�ǰ���Task����ǰ�ļ���Taskrun�е�id����������������Ŀ����֧��ͬһ������ ///���Խ����������ʵ����Ҳ���ǵ�����������е�ʱ���û�Ҳ�����Ĵ���������� ///һ��ʵ����ʼ���С� System.IO.File.Copy(Path + "\\" + File, RunPath + "\\" + "Task" + maxID + ".xml", true); //�ļ�������ȥ����Ҫ���ļ��е�TaskID�����Ǻ�TaskRun�е�TaskID���������� //�ڼ����ļ���ʱ������,ϵͳ��ID����Ψһ���� cXmlIO xmlFile; xmlFile = new cXmlIO(RunPath + "\\" + "Task" + maxID + ".xml"); string tID = xmlFile.GetNodeValue("Task/BaseInfo/ID"); xmlFile.EditNode("ID", tID, maxID.ToString()); xmlFile.Save(); xmlFile = null; return maxID ; }
public Int64 InsertTaskRun(string Path, string File) { ///首先判断存放任务执行的目录是否存在 ///此目录是固定目录,存放在系统\\Task\\run string RunPath = Program.getPrjPath() + "Tasks\\run"; if (!System.IO.Directory.Exists(RunPath)) { System.IO.Directory.CreateDirectory(RunPath); } ///先将此任务的摘要信息加载到TaskRun.xml文件中 Task.cTask t = new Task.cTask(); t.LoadTask(Path + "\\" + File); //开始构造xml节点内容 LoadTaskRunData(); Int64 maxID = GetNewID(); string tRunxml = ""; tRunxml = "<TaskID>" + maxID + "</TaskID>"; tRunxml += "<TaskName>" + t.TaskName + "</TaskName>"; tRunxml += "<TaskState>" + (int)cGlobalParas.TaskState.UnStart + "</TaskState>"; tRunxml += "<TaskType>" + t.TaskType + "</TaskType>"; tRunxml += "<RunType>" + t.RunType + "</RunType>"; tRunxml += "<ExportFile>" + t.ExportFile + "</ExportFile>"; tRunxml += "<tempFile>" + t.SavePath + "\\" + t.TaskName + "-" + maxID + ".xml" + "</tempFile>"; tRunxml += "<StartDate>" + DateTime.Now + "</StartDate>"; tRunxml += "<EndDate></EndDate>"; tRunxml += "<ThreadCount>" + t.ThreadCount + "</ThreadCount>"; tRunxml += "<UrlCount>" + t.UrlCount + "</UrlCount>"; ///TrueUrlCount表示如果采集的网址中存在导航网址,则需要采集的网址是无法根据公式极端出来的 ///需要采集任务不断执行,不断根据采集的规则进行计算采集网址的总数,所以需要再次记录此值 ///记录此值的目的是为了可以更好的跟踪采集的进度,但Urlcount不能修改,因为此值要进行任务分解 ///使用,如果改变了UrlCount则可能导致任务分解失败,在运营任务初始化的时候,此值同UrlCount,此值的 ///更改在任务运营时维护 tRunxml += "<TrueUrlCount>" + t.UrlCount + "</TrueUrlCount>"; tRunxml += "<GatheredUrlCount>0</GatheredUrlCount>"; tRunxml += "<GatheredTrueUrlCount>0</GatheredTrueUrlCount>"; tRunxml += "<ErrUrlCount>0</ErrUrlCount>"; tRunxml += "<TrueErrUrlCount>0</TrueErrUrlCount>"; tRunxml += "<IsLogin>" + t.IsLogin + "</IsLogin>"; tRunxml += "<PublishType>" + t.ExportType + "</PublishType>"; xmlConfig.InsertElement("Tasks", "Task", tRunxml); xmlConfig.Save(); xmlConfig = null; ///运行区的任务xml文件的格式与Task任务格式完全一眼个,但命名方式完全不同 ///命名格式是按照Task+当前文件在Taskrun中的id来命名,这样做的目的是支持同一个任务 ///可以建立多个运行实例,也就是当这个任务运行的时候,用户也可以修改此任务后建立另 ///一个实例开始运行。 System.IO.File.Copy(Path + "\\" + File, RunPath + "\\" + "Task" + maxID + ".xml", true); //文件拷贝过去后,需要修改文件中的TaskID,以吻合TaskRun中的TaskID索引,否则 //在加载文件的时候会出错,系统用ID来做唯一索引 cXmlIO xmlFile; xmlFile = new cXmlIO(RunPath + "\\" + "Task" + maxID + ".xml"); string tID = xmlFile.GetNodeValue("Task/BaseInfo/ID"); xmlFile.EditNode("ID", tID, maxID.ToString()); xmlFile.Save(); xmlFile = null; return(maxID); }
//根据指定的任务ID对当前的任务进行分解,如果有导航页,也需要在此进行 //分解 //并初始化此任务的关键数据 private void SplitTask() { cGatherTaskSplit dtc; List <Task.cWebLink> tWeblink; Task.cTask t = new Task.cTask(); //m_TaskData.TaskID = e.TaskID; //根据指定的TaskID加载任务地址信息 try { t.LoadTask(Int64.Parse(m_TaskData.TaskID.ToString())); } catch (System.Exception) { //调试实体文件加载失败,有可能是文件丢失所造成 //但还是需要加载一个空信息,以便界面可以显示此丢失的任务 //这样用户可以通过界面操作删除此任务内容,这是一个针对 //丢失文件的处理手段 m_TaskData.SavePath = ""; m_TaskData.TaskDemo = ""; m_TaskData.StartPos = ""; m_TaskData.EndPos = ""; m_TaskData.Cookie = ""; m_TaskData.WebCode = cGlobalParas.WebCode.auto; m_TaskData.IsLogin = false; m_TaskData.LoginUrl = ""; m_TaskData.PublishType = cGlobalParas.PublishType.NoPublish; m_TaskData.IsUrlEncode = false; m_TaskData.UrlEncode = ""; m_TaskData.Weblink = null; m_TaskData.CutFlag = null; return; } ////加载页面的采集起始位置和终止位置 ///此两项数据不在taskrun中存储,是在任务的xml文件中存储 ///但m_TaskData是按照taskrun来加载的数据,所以无法加载此两 ///项值和采集页面的规则及网址。 ///为什么从taskrun中加载,是因为在索引taskrun的时候可以显示界面 ///信息,所以就共用了一个加载信息的内容 m_TaskData.SavePath = t.SavePath; m_TaskData.TaskDemo = t.TaskDemo; m_TaskData.StartPos = t.StartPos; m_TaskData.EndPos = t.EndPos; m_TaskData.Cookie = t.Cookie; m_TaskData.WebCode = (cGlobalParas.WebCode) int.Parse(t.WebCode); m_TaskData.IsLogin = t.IsLogin; m_TaskData.LoginUrl = t.LoginUrl; m_TaskData.PublishType = (cGlobalParas.PublishType) int.Parse(t.ExportType); m_TaskData.IsUrlEncode = t.IsUrlEncode; m_TaskData.UrlEncode = t.UrlEncode; m_TaskData.GatherAgainNumber = t.GatherAgainNumber; m_TaskData.IsIgnore404 = t.IsIgnore404; m_TaskData.IsErrorLog = t.IsErrorLog; m_TaskData.IsDelRepRow = t.IsDelRepRow; m_TaskData.IsTrigger = t.IsTrigger; if (t.IsTrigger == true) { m_TaskData.TriggerType = t.TriggerType; m_TaskData.TriggerTask = t.TriggerTask; } ////加载网页地址数据及采集标志数据 ////再次去处理如果带有参数的网址,则需要进行分解 ////确保加载的网址肯定是一个有效的网址 ////注意,此时由于有可能分解任务信息,所以,网址数量在此会发生变化,所以,最终还需修改网址数据 Task.cWebLink w; Task.cUrlAnalyze u = new Task.cUrlAnalyze(); for (int i = 0; i < t.WebpageLink.Count; i++) { if (Regex.IsMatch(t.WebpageLink[i].Weblink.ToString(), "{.*}")) { List <string> Urls; if (m_TaskData.IsUrlEncode == true) { Urls = u.SplitWebUrl(t.WebpageLink[i].Weblink.ToString()); } else { Urls = u.SplitWebUrl(t.WebpageLink[i].Weblink.ToString()); } //开始添加m_TaskData.weblink数据 for (int j = 0; j < Urls.Count; j++) { w = new Task.cWebLink(); w.IsGathered = t.WebpageLink[i].IsGathered; w.IsNavigation = t.WebpageLink[i].IsNavigation; w.IsNextpage = t.WebpageLink[i].IsNextpage; w.NextPageRule = t.WebpageLink[i].NextPageRule; w.Weblink = Urls[j].ToString(); //加载导航数据 if (t.WebpageLink[i].IsNavigation == true) { w.NavigRules = t.WebpageLink[i].NavigRules; } m_TaskData.Weblink.Add(w); w = null; } } else { m_TaskData.Weblink.Add(t.WebpageLink[i]); } } u = null; m_TaskData.CutFlag = t.WebpageCutFlag; string sPath = m_TaskData.SavePath + "\\" + m_TaskData.TaskName + "_file"; //重新初始化UrlCount //m_TaskData.UrlCount = m_TaskData.Weblink.Count; //开始进行任务分块,但此任务的Url数必须大于线程数,且线程数>1 if (m_TaskData.UrlCount > m_TaskData.ThreadCount && m_TaskData.ThreadCount > 1) { int SplitUrlCount = (int)Math.Ceiling((decimal)m_TaskData.UrlCount / (decimal)m_TaskData.ThreadCount); //设置每个分解任务的起始Url索引和终止的Url索引 int StartIndex = 0; int EndIndex = 0; int j = 0; //for (int i = 1; i <= SplitUrlCount; i++) for (int i = 1; i <= m_TaskData.ThreadCount; i++) { StartIndex = EndIndex; if (i == m_TaskData.ThreadCount) { EndIndex = m_TaskData.Weblink.Count; } else { //EndIndex = i * m_TaskData.ThreadCount; EndIndex = i * SplitUrlCount; } //初始化分解采集任务类 dtc = new cGatherTaskSplit(); dtc.TaskManage = m_TaskManage; dtc.TaskID = m_TaskData.TaskID; dtc.WebCode = m_TaskData.WebCode; dtc.IsUrlEncode = m_TaskData.IsUrlEncode; dtc.UrlEncode = m_TaskData.UrlEncode; dtc.Cookie = m_TaskData.Cookie; dtc.StartPos = m_TaskData.StartPos; dtc.EndPos = m_TaskData.EndPos; dtc.SavePath = sPath; dtc.AgainNumber = m_TaskData.GatherAgainNumber; dtc.Ignore404 = m_TaskData.IsIgnore404; dtc.IsErrorLog = m_TaskData.IsErrorLog; tWeblink = new List <Task.cWebLink>(); for (j = StartIndex; j < EndIndex; j++) { tWeblink.Add(m_TaskData.Weblink[j]); } //初始化分解的子任务数据 dtc.SetSplitData(StartIndex, EndIndex - 1, tWeblink, m_TaskData.CutFlag); m_TaskData.TaskSplitData.Add(dtc.TaskSplitData); tWeblink = null; dtc = null; } } else { //初始化分解采集任务类 dtc = new cGatherTaskSplit(); dtc.TaskManage = m_TaskManage; dtc.TaskID = m_TaskData.TaskID; dtc.WebCode = m_TaskData.WebCode; dtc.IsUrlEncode = m_TaskData.IsUrlEncode; dtc.UrlEncode = m_TaskData.UrlEncode; dtc.Cookie = m_TaskData.Cookie; dtc.StartPos = m_TaskData.StartPos; dtc.EndPos = m_TaskData.EndPos; dtc.SavePath = sPath; dtc.AgainNumber = m_TaskData.GatherAgainNumber; dtc.Ignore404 = m_TaskData.IsIgnore404; dtc.IsErrorLog = m_TaskData.IsErrorLog; dtc.SetSplitData(0, m_TaskData.UrlCount - 1, m_TaskData.Weblink, m_TaskData.CutFlag); m_TaskData.TaskSplitData.Add(dtc.TaskSplitData); //m_list_GatherTaskSplit.Add(dtc); } t = null; dtc = null; }
//�����ṩ��taskid����������Ϣ //���ݲ�Ӧ���Ǵ�����,�Ƕ�ȡ�ļ���,�����ڲ�֧��������,���Դ����� private void LoadTaskInfo(Int64 TaskID, System.Data.DataTable dData) { //DataTable dt = new DataTable(); Task.cTask t = new Task.cTask(); t.LoadTask(Program.getPrjPath () + "tasks\\run\\task" + TaskID + ".xml"); string FileName = t.SavePath + "\\" + t.TaskName + "-" + t.TaskID + ".xml"; m_pTaskData.TaskID =t.TaskID ; m_pTaskData.TaskName =t.TaskName ; //m_pTaskData.DataPwd =t.DataPwd ; m_pTaskData.ExportFile = t.ExportFile; m_pTaskData.DataSource =t.DataSource ; //m_pTaskData.DataUser =t.DataUser ; m_pTaskData.FileName = FileName; //dt.ReadXml(FileName); //��Ҫ����Ļ��ߵ��������ݻ��Ǵ��룬��Ϊ��Ҫ��ʱ���ݵı��� //��һ����Ҫ����ʱ���ݱ���ͷ������ݽ��з��� m_pTaskData.PublishData = dData ; m_pTaskData.PublishData.TableName = t.TaskName + "-" + t.TaskID + ".xml"; m_pTaskData.PublishType =(cGlobalParas.PublishType)(int.Parse (t.ExportType )); m_pTaskData.DataTableName =t.DataTableName ; m_pTaskData.InsertSql = t.InsertSql; m_pTaskData.ExportUrl = t.ExportUrl; m_pTaskData.ExportUrlCode = t.ExportUrlCode; m_pTaskData.ExportCookie = t.ExportCookie; m_pTaskData.IsErrorLog = t.IsErrorLog; m_pTaskData.IsTrigger = t.IsTrigger; m_pTaskData.IsExportHeader = t.IsExportHeader; if (t.IsTrigger == true) { m_pTaskData.TriggerType = t.TriggerType; m_pTaskData.TriggerTask = t.TriggerTask; } t=null; }