public ActionResult SingelSearch(string guid = null, string searchInfo = null) { CheckTime(); QgOrgCodeEntity qgCrawlerEntity = null; if (string.IsNullOrWhiteSpace(guid) && string.IsNullOrWhiteSpace(searchInfo)) { ViewBag.Guid = Guid.NewGuid(); } else { ViewBag.Guid = guid; List<string> companyList = new List<string> { searchInfo }; TaskEntity taskEntity = new TaskEntity(); taskEntity.TaskType = EnumTaskType.QgCrawler; taskEntity.TaskName = $"单个任务[{DateTime.Now.ToString("G")}]"; taskEntity.Unique = Conv.ToGuid(guid); taskEntity.TaskStateDicId = 2; taskEntity.TaskNum = 1; taskEntity.CreateTime = DateTime.Now; taskEntity.IsSingelSearch = true; new TaskDomain().Add(taskEntity); var tasklist = Crawler.QgOrgCode.TaskList.GetInstance(); tasklist.AddTask(companyList); new QgCrawler(taskEntity).RunCrawler(tasklist, 1); qgCrawlerEntity = new QgOrgCodeDomain().Get(t => t.companyName == searchInfo && t.jgmc != null).FirstOrDefault(); } return View(qgCrawlerEntity); }
/// <summary> /// 待查询数据初始化,初始化完成后将更新任务状态 /// </summary> /// <param name="lists"></param> /// <param name="operatorName">操作员</param> /// <param name="model">任务模型</param> /// <param name="action">导入完成后执行的内容</param> public void InsertMetadata(List<string> lists, string operatorName, TaskEntity model, Action<TaskEntity> action) { var tasks = new Task[3]; for (var i = 0; i < 3; i++) { var task = new Task(() => { while (true) { lock (lists) { if (lists.Count <= 0) break; var companyName = lists.Last(); new TargeCompanyDomain().Add(new TargeCompanyEntity { TaskGuid = model.Unique, CompanyName = companyName, CreateTime = TaskEntity.CreateTime, IsSearched = false, OperatorName = operatorName }); Console.WriteLine("成功插入:{0} 线程 {1}", Task.CurrentId, companyName); lists.Remove(companyName); } } }); task.Start(); tasks[i] = task; } Task.WaitAll(tasks); //更新任务状态 model.TaskStateDicId = 2; new TaskDomain().Update(model); action(TaskEntity); Console.WriteLine("数据导入完毕"); }
public void Update(TaskEntity model) { using (LiGatherContext _db = new LiGatherContext()) { _db.TaskEntities.AddOrUpdate(model); _db.SaveChanges(); } }
public ActionResult SingelSearch(string guid = null, string searchInfo = null) { CrawlerEntity crawlerEntity = null; if (string.IsNullOrWhiteSpace(guid) && string.IsNullOrWhiteSpace(searchInfo)) { ViewBag.Guid = Guid.NewGuid(); } else { ViewBag.Guid = guid; var count = new TargeCompanyDomain().Get(t => t.CompanyName.Equals(searchInfo))?.Count; if (count > 0) { //历史记录中已存在 } else { //上网检索 List<string> companyList = new List<string> { searchInfo }; TaskEntity model = new TaskEntity(); model.TaskType = EnumTaskType.BjCrawler; model.TaskName = $"单个任务[{DateTime.Now.ToString("G")}]"; model.Unique = Conv.ToGuid(guid); model.TaskStateDicId = 1; model.TaskNum = 1; model.CreateTime = DateTime.Now; model.IsSingelSearch = true; new TaskDomain().Add(model); new BaseData(model).InsertMetadata(companyList.ToList(), model.TaskName, model, taskEntity => { Task[] tasks = new Task[4]; for (int i = 0; i < 4; i++) { tasks[i] = new Task(() => { var bjqyxy = new Crawler.Bjqyxy.BjCrawler(taskEntity, t => t.TaskGuid.Equals(taskEntity.Unique)); bjqyxy.SingelSearch(searchInfo); }); tasks[i].Start(); } Task.WaitAny(tasks); }); } crawlerEntity = new CrawlerDomain().Get(t => t.搜索名称 == searchInfo && t.名称 != null).FirstOrDefault(); } return View(crawlerEntity); }
public ActionResult Create(TaskEntity taskEntity, HttpPostedFileBase txtfile) { var stream = txtfile.InputStream; var streamread = new StreamReader(stream, Encoding.Default); var companyList = new List<string>(); while (!streamread.EndOfStream) companyList.Add(streamread.ReadLine()); taskEntity.TaskType = EnumTaskType.QgCrawler; taskEntity.TaskStateDicId = 2; taskEntity.TaskNum = companyList.Count; taskEntity.CreateTime = DateTime.Now; taskEntity.IsSingelSearch = false; var tasklist = Crawler.QgOrgCode.TaskList.GetInstance(); new TaskDomain().Add(taskEntity); new Task(() => { tasklist.AddTask(companyList); new QgCrawler(taskEntity).RunCrawler(tasklist); }).Start(); return Json(new { msg = $"成功上传了任务文件,系统接受到{companyList.Count}条记录,即将执行查询." }); }
public ActionResult Create(TaskEntity model, HttpPostedFileBase txtfile) { var stream = txtfile.InputStream; var streamread = new StreamReader(stream, Encoding.Default); var companyList = new List<string>(); while (!streamread.EndOfStream) companyList.Add(streamread.ReadLine()); model.TaskType = EnumTaskType.BjCrawler; model.TaskStateDicId = 1; model.TaskNum = companyList.Count; model.CreateTime = DateTime.Now; model.IsSingelSearch = false; new TaskDomain().Add(model); new Task(() => { new BaseData(model).InsertMetadata(companyList.ToList(), model.TaskName, model, taskEntity => { var bjqyxy = new Crawler.Bjqyxy.BjCrawler(taskEntity, t => t.TaskGuid.Equals(taskEntity.Unique)); new Task(() => { bjqyxy.CrawlerWork(); }).Start(); }); }).Start(); return Json(new { msg = $"成功上传了任务文件,系统接受到{companyList.Count}条记录,正在导入系统中。。。" }); }
private static void BjCrawler() { TaskEntity model = new TaskEntity { OperatorName = "张雪艳", Unique = Guid.NewGuid(), CreateTime = DateTime.Now, TaskName = "测试任务", TaskStateDicId = 1, }; //Proxy.Proxy.GetInstance().ProxySave(100, 10); //Proxy.Proxy.GetInstance().ValidateCanUse(7); var OperatorName = "张雪艳"; //操作人 //待查数据初始化 var lists = File.ReadAllLines("E:/1.txt", Encoding.Default).ToList(); new BaseData(model).InsertMetadata(lists, OperatorName, model, taskEntity => { //抓取数据 var bjqyxy = new Crawler.Bjqyxy.BjCrawler(taskEntity, t => t.TaskGuid.Equals(taskEntity.Unique)); bjqyxy.CrawlerWork(); }); }
public ActionResult Export(TaskEntity model, bool isOptimize) { var crawlerlists = new CrawlerDomain().Get(t => t.TaskGuid == model.Unique).OrderByDescending(t => t.爬行更新时间).ToList(); if (crawlerlists.Count < 1) return Content("<script>alert('未找到内容');</script>"); var bytes = crawlerlists.ListToExcel(isOptimize); return File(bytes, "application/vnd.ms-excel", "导出北京企业采集信息[" + DateTime.Now.ToString("yyyy-M-d dddd") + "].xls"); }
public ActionResult CheckGoGather(TaskEntity model) { var searchNum = new TargeCompanyDomain().GetInt( t => t.TaskGuid == model.Unique && t.IsSearched); return Json(new { state = "doing", num = searchNum }); }
public ActionResult GoGather(TaskEntity model) { //Thread.Sleep(1000 * 3); //默认等待三秒 //抓取数据 var bjqyxy = new Crawler.Bjqyxy.BjCrawler(model, t => t.TaskGuid.Equals(model.Unique)); new Task(() => { bjqyxy.CrawlerWork(); }).Start(); return Json(new { state = "nothion" }); }
public ActionResult CheckInsertMetadata(TaskEntity model) { var insertNum = new TargeCompanyDomain().GetInt(t => t.TaskGuid == model.Unique); return Json(new { state = "doing", num = insertNum }); }
/// <summary> /// 北京企业信用信息网 爬虫 /// 维护时间:2016年1月18日 15:47:02 /// </summary> /// <param name="model"></param> /// <param name="queryCondition">企业名单检索条件</param> public BjCrawler(TaskEntity model, Expression<Func<TargeCompanyEntity, bool>> queryCondition) { TaskEntity = model; QueryCondition = queryCondition; }
public ActionResult CheckGoGather(TaskEntity model) { var searchNum = new QgOrgCodeDomain().Get(t => t.TaskGuid == model.Unique).Count; return Json(new { state = "doing", num = searchNum }); }
/// <summary> /// 基础数据操作 /// </summary> /// <param name="taskEntity"></param> public BaseData(TaskEntity taskEntity) { this.TaskEntity = taskEntity; }
public QgCrawler(TaskEntity taskEntity) { TaskEntity = taskEntity; }