private static void MasterDataReceivedEvent(DataReceivedEventArgs args) { // 在此处解析页面,可以用类似于 HtmlAgilityPack(页面解析组件)的东东、也可以用正则表达式、还可以自己进行字符串分析 //NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(args.Html); #region 接收数据处理,//如果有问题可以使用多线程 DataReceivedEventArgs_Kiwi.Instance.EnQueue(args); //原来线程池操作 //ThreadPool.QueueUserWorkItem(o => //{ // WriteToDB(); //}); #endregion 接收数据处理 }
private static void WriteToFiles(DataReceivedEventArgs dataReceived) { KiwiCrawler.BLL.Capturedata_kBll bll = new KiwiCrawler.BLL.Capturedata_kBll(); KiwiCrawler.Model.Capturedata_k model = new KiwiCrawler.Model.Capturedata_k(); model.kContent = dataReceived.Html.Trim(); model.kPageMD5 = MD5Helper.MD5Helper.ComputeMd5String(model.kContent);//获得MD5值 //判断是否存在MD5值,存在不处理-->说明:该页面已经存在且无变化 // 不存在 -->URL是否存在?-->存在-->更新了 // -->不存在-->新添加的 KiwiCrawler.Model.Capturedata_k getModel = bll.GetModelList("kPageMD5='" + model.kPageMD5 + "'").FirstOrDefault(); if (getModel == null)//不存在 { model.kUrl = dataReceived.Url; getModel = bll.GetModelList("kUrl=" + "'" + model.kUrl + "'").FirstOrDefault(); if (getModel != null)//更新了 { getModel.kContent = model.kContent; getModel.kExtracted = 0; getModel.kPageMD5 = model.kPageMD5; getModel.kUpdateTime = DateTime.Now; getModel.kIsUpdated = 1; //添加一个字段 bll.Update(getModel); writeToLogView(dataReceived, "更新"); } else//新添加的==>扫描之后,有新添加的数据,完成度如何更新 { model.kCaptureDateTime = DateTime.Now; model.kType = configModel.kAddressBusinessType.Trim();//民政部门;安全生产监督管理局;地震局 fileId++; model.kNumber = fileId; model.kExtracted = 0; model.kNotes = configModel.kId + ":" + configModel.kKeyWords; model.kUpdateTime = model.kCaptureDateTime; model.kIndexId = configModel.kId; model.kIsUpdated = 0; bll.Add(model); writeToLogView(dataReceived, "添加"); } } else { writeToLogView(dataReceived, "存在"); } }
private static void writeToLogView(DataReceivedEventArgs dataReceived, String str) { //Kiwi-log kiwiConsole.WriteOutput(DateTime.Now.ToString() + " -" + fileId + "-【" + Thread.CurrentThread.ManagedThreadId + "】-" + "【" + str + "】" + dataReceived.Url + "\r\n", Color.Green); }