/// <summary> /// 初始化抓取程序类型设定 /// </summary> public static void InitCrawller(TreeView tree) { if (tree.Nodes.Count > 0) { SaveConfigs(tree); } tree.Nodes.Clear(); Dictionary <string, Type> dictCrawllers = ToolComm.DictTypeCrawllers; DateTime dt = DateTime.Now; foreach (string typeFullName in dictCrawllers.Keys) { Type crawllerType = dictCrawllers[typeFullName]; WebSiteCrawller crawller = Assembly.GetAssembly(crawllerType).CreateInstance(typeFullName) as WebSiteCrawller; if (!string.IsNullOrEmpty(crawller.Title)) { string groupName = string.IsNullOrEmpty(crawller.Group) ? "其他" : crawller.Group; TreeNode[] tn_arr = tree.Nodes.Find(groupName, true); TreeNode group = null; if (tn_arr != null && tn_arr.Length > 0) { group = tn_arr[0]; } else { group = new TreeNode(); group.Text = groupName; group.Name = groupName; group.Checked = true; tree.Nodes.Add(group); } if (group != null) { TreeNode node = new TreeNode(); node.Text = crawller.Title; node.Name = typeFullName; node.ToolTipText = crawller.Description; node.Checked = crawller.Enabled; node.ForeColor = crawller.Enabled ? Color.Blue : Color.Gray; node.Tag = crawller; group.Nodes.Add(node); } } } LoadConfigs(tree); tree.ExpandAll(); }
protected static void GetNodeConfigs(TreeNode node, List <WebSiteCrawlConfig> configs) { WebSiteCrawller crawller = node.Tag as WebSiteCrawller; if (crawller != null) { configs.Add(crawller.Config); } if (node.Nodes.Count > 0) { foreach (TreeNode child in node.Nodes) { GetNodeConfigs(child, configs); } } }
protected static void GetNodeCrawllers(TreeNode node, Dictionary <string, WebSiteCrawller> crawllers) { if (node.Checked) { WebSiteCrawller crawller = node.Tag as WebSiteCrawller; if (crawller != null) { crawllers.Add(crawller.Key, crawller); } if (node.Nodes.Count > 0) { foreach (TreeNode child in node.Nodes) { GetNodeCrawllers(child, crawllers); } } } }
public static void UpdateNodeConfigs(TreeNode node, bool updateChilds) { WebSiteCrawller crawller = node.Tag as WebSiteCrawller; if (crawller != null) { node.Text = crawller.Title; node.Name = crawller.Key; node.ToolTipText = crawller.Description; node.Checked = crawller.Enabled; node.ForeColor = crawller.Enabled ? Color.Blue : Color.Gray; } if (node.Nodes.Count > 0) { foreach (TreeNode child in node.Nodes) { UpdateNodeConfigs(child, updateChilds); } } }
private void ClearSelect() { foreach (TreeNode tn in treCrawlers.Nodes) { if (tn.Text.Contains(txtSertch.Text.Trim())) { tn.ForeColor = Color.Blue; } if (tn.Nodes.Count > 1) { foreach (TreeNode node in tn.Nodes) { WebSiteCrawller crawller = node.Tag as WebSiteCrawller; if (crawller != null) { node.ForeColor = crawller.Enabled ? Color.Blue : Color.Gray; } } } } }
private void treCrawlers_AfterCheck(object sender, TreeViewEventArgs e) { if (e.Node != null) { WebSiteCrawller crawller = e.Node.Tag as WebSiteCrawller; if (crawller != null) { crawller.Enabled = e.Node.Checked; } foreach (TreeNode child in e.Node.Nodes) { child.Checked = e.Node.Checked; WebSiteCrawller childCrawller = child.Tag as WebSiteCrawller; if (childCrawller != null) { childCrawller.Enabled = child.Checked; } } } }
/// <summary> /// 启动定时抓取程序 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void BtnBegin_Click(object sender, EventArgs e) { dictTimerCrawllers.Clear(); int count = 0; Dictionary <string, WebSiteCrawller> crawllers = ToolComm.GetEnabledCrawllers(treCrawlers); foreach (string key in crawllers.Keys) { count++; WebSiteCrawller crawller = crawllers[key]; if (dictTimerCrawllers.ContainsKey(crawller.PlanTime)) { dictTimerCrawllers[crawller.PlanTime].Add(crawller); } else { List <WebSiteCrawller> list = new List <WebSiteCrawller>(); list.Add(crawller); dictTimerCrawllers.Add(crawller.PlanTime, list); } } if (count == 0) { MessageBox.Show("请选择要抓取的选项!"); return; } ToolComm.SaveConfigs(treCrawlers); timer.AutoReset = true; timer.Enabled = true; timer.Interval = 60 * 1000; //1分钟执行一次 timer.Elapsed += new System.Timers.ElapsedEventHandler(timer_Tick); AppendText("开始抓取数据......"); BtnBegin.Enabled = false; grpCurr.Enabled = false; }
/// <summary> /// 得到某个类的抓取时间 /// </summary> public static string GetCrawlerInfo(WebSiteCrawller crawller) { if (crawller == null) { return(string.Empty); } StringBuilder sb = new StringBuilder(); sb.AppendFormat("抓取说明:{0}", crawller.Description); sb.AppendLine(); sb.AppendFormat("抓取程序:{0}", crawller.Key); sb.AppendLine(); sb.AppendFormat("上次抓取:{0:yyyy-MM-dd HH:mm:ss}", crawller.LastCrawlEnd); sb.AppendLine(); sb.AppendFormat("计划时间:{0}", crawller.PlanTime.Replace(",", ", ")); sb.AppendLine(); sb.AppendFormat("抓取数量:最近 {0} 条", crawller.MaxCount); sb.AppendLine(); sb.AppendFormat("抓取地址:{0}", crawller.SiteUrl); sb.AppendLine(); return(sb.ToString()); }
protected static void SetNodeConfigs(TreeNode node, Dictionary <string, WebSiteCrawlConfig> configs) { WebSiteCrawller crawller = node.Tag as WebSiteCrawller; if (crawller != null && !string.IsNullOrEmpty(crawller.Key)) { if (configs.ContainsKey(crawller.Key)) { WebSiteCrawlConfig config = configs[crawller.Key]; if (config != null && !string.IsNullOrEmpty(config.Key)) { crawller.Config = config; UpdateNodeConfigs(node, false); } } } if (node.Nodes.Count > 0) { foreach (TreeNode child in node.Nodes) { SetNodeConfigs(child, configs); } } }
private void treCrawlers_AfterSelect(object sender, TreeViewEventArgs e) { currNode = e.Node; currCrawller = currNode.Tag as WebSiteCrawller; SetCurrCrawllerDetails(); }
public WebSiteCollection(WebSiteCrawller crawller) { this.WebCrawller = crawller; }
/// <summary> /// 根据反射,从实体中调用方法,得到返回结果 /// </summary> /// <param name="assemblyPath"></param> /// <param name="entityFullName"></param> /// <param name="crawlAll">是否抓取所有数据</param> /// <returns></returns> public static string DealEntity(WebSiteCrawller crawller) { StringBuilder result = new StringBuilder(); try { //处理信息 IList infoList = crawller.Crawl(false); if (infoList != null && infoList.Count > 0) { object[] successList; List <BaseAttach> tattachList = crawller.AttachList; int count = ToolCoreDb.SaveDatas(infoList, crawller.ExistCompareFields, tattachList, out successList, crawller.ExistsUpdate, crawller.ExistsHtlCtx, crawller.ExistsUpdateAttach); result.Append("【").Append(crawller.Title).Append("】信息【").Append(count).Append("/").Append(infoList.Count).Append("】条;"); Base.KdService.CrawlerService ser = new Base.KdService.CrawlerService(); int resultCount; //处理附件 List <BaseAttach> newAttachList = new List <BaseAttach>(); if (crawller.ExistsUpdateAttach) { if (infoList != null && infoList.Count > 0) { List <BaseAttach> attch = null; foreach (var item in infoList) { Type types = item.GetType(); string id = types.GetProperty("Id").GetValue(item, null).ToString(); attch = tattachList.FindAll(a => a.SourceID == id); for (int i = 0; i < attch.Count; i++) { newAttachList.Add(attch[i]); } } } } else { if (successList != null && successList.Length > 0) { List <BaseAttach> attch = null; foreach (var item in successList) { Type types = item.GetType(); string id = types.GetProperty("Id").GetValue(item, null).ToString(); attch = tattachList.FindAll(a => a.SourceID == id); for (int i = 0; i < attch.Count; i++) { newAttachList.Add(attch[i]); } } } } count = ToolCoreDb.SaveDatas(newAttachList, "SourceID,AttachServerPath"); result.Append("附件【").Append(count).Append("/").Append(tattachList.Count).Append("】条;"); crawller.AttachList.Clear(); } else { result.Append("【").Append(crawller.Title).Append("】信息【").Append("0").Append("/").Append("0").Append("】条;"); } } catch (Exception ex) { Logger.Error(ex); result.Remove(0, result.Length); result.Append("抓取【").Append(crawller.Title).Append("】出现异常,详见日志文件!"); } return(result.ToString()); }