public void workWithTaskFileDB(string[] args) { try { string EXTRA_COLUMN1 = "未定义1"; string EXTRA_COLUMN2 = "未定义2"; string OUTPUT_FILENAME = "OUTPUT " + EXTRA_COLUMN1 + " " + EXTRA_COLUMN2 + ".xlsx"; string taskJson = ""; //这些参数在任务文件里面写上? var taskk = Tools.Serializer.DeSerializeTSK(taskJson); Console.WriteLine("读取正常任务文件...ok!"); for (int i = taskk.Current; i <= taskk.StarEnd; i = i + taskk.StarGap) { taskk.Current = i; Console.WriteLine("当前进行到页码.............................." + i + "/" + taskk.StarEnd); for (int j = 0; j < taskk.Processor.Count; j++) { Console.WriteLine("提取器位置..." + (j + 1).ToString() + "/" + taskk.Processor.Count); //field1 每页的数量 var field0 = Tools.Scraper.Scrape(getor(taskk.CurrentURL), taskk.Processor[j]); foreach (var c0Item in field0) { listResult.Add(new Models.Result() { c0 = c0Item }); } Console.WriteLine("提取到数据条数===" + field0.Count); //含有子处理器,将field0交给另外个函数处理。处理完毕后保证field0是处理后的结果。 if (taskk.Processor[j].SubProcessor != null && taskk.Processor[j].SubProcessor.Count > 0) { Console.WriteLine("发现此项为含有扩展任务...爬取扩展任务~"); int subPsrColumn = taskk.Processor.Count;//扩展的SubPsr结果从列序号开始。 foreach (var c0Item in field0) { foreach (var subpro in taskk.Processor[j].SubProcessor) { var field0_sub0_s = Tools.Scraper.Scrape(getor(c0Item), subpro); Models.Result c0Result = findByC0(c0Item);//先通过查找找到C0的Result的对象引用 foreach (var field0_sub0 in field0_sub0_s) { var cloneR = (c0Result.Clone() as Models.Result); cloneR.c9 = field0_sub0;//稍后就c8 listResult.Add(cloneR); } } } List <List <string> > subResult = ProcessSub(taskk.Processor[j].SubProcessor, field0); Console.WriteLine("扩展任务完成!"); } #if outputView Console.WriteLine("保存数据.............................OK!"); #endif } } System.Diagnostics.Process.Start(OUTPUT_FILENAME); } catch (Exception err) { Console.WriteLine(err.ToString()); System.IO.File.WriteAllText("ERROR@" + DateTime.Now.ToString("yyyyMMdd HHmmss") + ".log", err.ToString()); } Console.WriteLine("按任意键退出..."); //Console.ReadKey(); }