Пример #1
0
        public void workWithTaskFileDB(string[] args)
        {
            try
            {
                string EXTRA_COLUMN1   = "未定义1";
                string EXTRA_COLUMN2   = "未定义2";
                string OUTPUT_FILENAME = "OUTPUT " + EXTRA_COLUMN1 + " " + EXTRA_COLUMN2 + ".xlsx";
                string taskJson        = "";

                //这些参数在任务文件里面写上?



                var taskk = Tools.Serializer.DeSerializeTSK(taskJson);
                Console.WriteLine("读取正常任务文件...ok!");

                for (int i = taskk.Current; i <= taskk.StarEnd; i = i + taskk.StarGap)
                {
                    taskk.Current = i;
                    Console.WriteLine("当前进行到页码.............................." + i + "/" + taskk.StarEnd);

                    for (int j = 0; j < taskk.Processor.Count; j++)
                    {
                        Console.WriteLine("提取器位置..." + (j + 1).ToString() + "/" + taskk.Processor.Count);

                        //field1 每页的数量
                        var field0 = Tools.Scraper.Scrape(getor(taskk.CurrentURL), taskk.Processor[j]);

                        foreach (var c0Item in field0)
                        {
                            listResult.Add(new Models.Result()
                            {
                                c0 = c0Item
                            });
                        }


                        Console.WriteLine("提取到数据条数===" + field0.Count);

                        //含有子处理器,将field0交给另外个函数处理。处理完毕后保证field0是处理后的结果。
                        if (taskk.Processor[j].SubProcessor != null && taskk.Processor[j].SubProcessor.Count > 0)
                        {
                            Console.WriteLine("发现此项为含有扩展任务...爬取扩展任务~");
                            int subPsrColumn = taskk.Processor.Count;//扩展的SubPsr结果从列序号开始。


                            foreach (var c0Item in field0)
                            {
                                foreach (var subpro in taskk.Processor[j].SubProcessor)
                                {
                                    var field0_sub0_s = Tools.Scraper.Scrape(getor(c0Item), subpro);

                                    Models.Result c0Result = findByC0(c0Item);//先通过查找找到C0的Result的对象引用

                                    foreach (var field0_sub0 in field0_sub0_s)
                                    {
                                        var cloneR = (c0Result.Clone() as Models.Result);
                                        cloneR.c9 = field0_sub0;//稍后就c8

                                        listResult.Add(cloneR);
                                    }
                                }
                            }

                            List <List <string> > subResult = ProcessSub(taskk.Processor[j].SubProcessor, field0);

                            Console.WriteLine("扩展任务完成!");
                        }



#if outputView
                        Console.WriteLine("保存数据.............................OK!");
#endif
                    }
                }

                System.Diagnostics.Process.Start(OUTPUT_FILENAME);
            }
            catch (Exception err)
            {
                Console.WriteLine(err.ToString());
                System.IO.File.WriteAllText("ERROR@" + DateTime.Now.ToString("yyyyMMdd HHmmss") + ".log", err.ToString());
            }
            Console.WriteLine("按任意键退出...");
            //Console.ReadKey();
        }