/// <summary> /// 构造函数 /// </summary> /// <param name="rule">采集规则</param> ///<param name="index">层次编号</param> public ExtractionRulesItem(ExtractionRule rule, int index) { this.SubItems.Clear(); this.rule = rule; this.Text = index.ToString(); this.ImageKey = "taskmin.png"; this.Font = new System.Drawing.Font("宋体", 10); this.SubItems.Add(new ListViewSubItem(this, rule.Name)); this.SubItems.Add(new ListViewSubItem(this, rule.Layer.ToString())); this.SubItems.Add(new ListViewSubItem(this, rule.DataColumn)); this.SubItems.Add(new ListViewSubItem(this, rule.DataUnique.ToString())); this.SubItems.Add(new ListViewSubItem(this, rule.PreviousFlag)); this.SubItems.Add(new ListViewSubItem(this, rule.FollowingFlag)); }
/// <summary> /// 获取采集规则 /// </summary> /// <returns>采集规则</returns> private ExtractionRule GetExtraction() { ExtractionRule e = new ExtractionRule(); e.AttachmentUrlIdentifier = cbxAttachmentUrlIdentifier.Text; //附件网址标志 e.ClassDirectoryField = cbxClassDirectory.Text; e.ConstantAsResult = chbFixedAsResult.Checked; //固定值作为结果 e.ConstantValue = txtFixeValue.Text; //固定值作为结果【值】 e.CreateSubDirectories = chbCreateSubDirectories.Checked; //创建子目录 e.CurrentSubDirectory = ""; //当前子目录 e.DataColumn = cbxDataColumn.Text; //数据库字段 e.DataUnique = chbDataUnique.Checked; //唯一数据 e.DetectRealUrl = chbDetectRealUrl.Checked; //探测真实Url e.DownloadAttachments = chbDownloadAttachments.Checked; //下载附件 e.DownloadDirectory = txtDownloadDirectory.Text; //下载目录 e.DownloadFlashes = chbDownloadFlashes.Checked; //下载FLASH e.DownloadImages = chbDownloadImages.Checked; //下载图片 e.Essential = chkKeyRule.Checked; //必要规则 e.FileNameExtension = cbxFileNameExtension.Text; //文件扩展名 e.FilesPerSubDirectory = Convert.ToInt32(nudFilesPerSubDirectory.Value); //每隔子目录文件数量 e.Filters = null; //过滤选项 e.FollowingFlag = txtFollowingFlag.Text; //信息后标志 e.Global = chbGlobal.Checked; //全局规则 e.IsDownloadUrl = false; //下载网址 e.Layer = Layer.Terminator; //页面层次 e.LinkTextAsResult = chkLinkTextAsResult.Checked; //连接文本作为结果 e.MergenceSeparator = txtMergenceSeparator.Text; //合并后的页面分隔符 e.MergePages = chbMergePages.Checked; //合并分页 e.Name = cbxName.Text; //规则名称 e.PostParametersAsResult = chbPostParametersAsResult.Checked; //POST参数作为结果 e.PreviousFlag = txtPreviousFlag.Text; //信息前标志 e.ReserveAllHtmlMarks = rdoReserveAllHtmlMarks.Checked; //保留所有Html标记 e.ResponseHeaderAsResult = chbResponseHeaderAsResult.Checked; //http头作为响应结果 e.ResponseHeaderName = cbxResponseHeaderName.Text; //响应头名 e.SkipIfFileExisted = chbSkipIfFileExisted.Checked; //文件存在则跳过 e.Static = chbStatic.Checked; //静态规则 e.TimeAsResult = chbTimeAsResult.Checked; //记录采集时间 e.UrlAsResult = chbUrlAsResult.Checked; //记录当前网址 e.UseClassDirectory = chbUseClassDirectory.Checked; //使用分类目录 e.UsePlugin = chbUsePlugin.Checked; //使用插件采集数据 e.UseRandomFileName = chbUseRandomFileName.Checked; //使用随机文件名 e.VirtualPath = txtVirtualPath.Text; //虚拟路径 //采集结果替换 foreach (Utility.ExtractionRulesItem item in LivExtractionRule.SelectedItems) { e.Replacements = item.rule.Replacements; } //保留的Html标志 for (int i = 0; i < chkHtmlMakeList.Items.Count; i++) { if (chkHtmlMakeList.GetSelected(i)) { foreach (HtmlMark h in htmlMarks) { if (((string)chkHtmlMakeList.Items[i]).Equals(h.DisplayName)) { e.ReservedHtmlMarks.Add(h); } } } } return(e); }
/// <summary> /// 获取采集规则 /// </summary> /// <returns>采集规则</returns> private ExtractionRule GetExtraction() { ExtractionRule e = new ExtractionRule(); e.AttachmentUrlIdentifier = cbxAttachmentUrlIdentifier.Text; //附件网址标志 e.ClassDirectoryField = cbxClassDirectory.Text; e.ConstantAsResult = chbFixedAsResult.Checked; //固定值作为结果 e.ConstantValue = txtFixeValue.Text; //固定值作为结果【值】 e.CreateSubDirectories = chbCreateSubDirectories.Checked; //创建子目录 e.CurrentSubDirectory = ""; //当前子目录 e.DataColumn = cbxDataColumn.Text; //数据库字段 e.DataUnique = chbDataUnique.Checked; //唯一数据 e.DetectRealUrl = chbDetectRealUrl.Checked; //探测真实Url e.DownloadAttachments = chbDownloadAttachments.Checked; //下载附件 e.DownloadDirectory = txtDownloadDirectory.Text; //下载目录 e.DownloadFlashes = chbDownloadFlashes.Checked; //下载FLASH e.DownloadImages = chbDownloadImages.Checked; //下载图片 e.Essential = chkKeyRule.Checked; //必要规则 e.FileNameExtension = cbxFileNameExtension.Text; //文件扩展名 e.FilesPerSubDirectory = Convert.ToInt32(nudFilesPerSubDirectory.Value); //每隔子目录文件数量 e.Filters = null; //过滤选项 e.FollowingFlag = txtFollowingFlag.Text; //信息后标志 e.Global = chbGlobal.Checked; //全局规则 e.IsDownloadUrl = false; //下载网址 e.Layer = Layer.Terminator; //页面层次 e.LinkTextAsResult = chkLinkTextAsResult.Checked; //连接文本作为结果 e.MergenceSeparator = txtMergenceSeparator.Text; //合并后的页面分隔符 e.MergePages = chbMergePages.Checked; //合并分页 e.Name = cbxName.Text; //规则名称 e.PostParametersAsResult = chbPostParametersAsResult.Checked; //POST参数作为结果 e.PreviousFlag = txtPreviousFlag.Text; //信息前标志 e.ReserveAllHtmlMarks = rdoReserveAllHtmlMarks.Checked; //保留所有Html标记 e.ResponseHeaderAsResult = chbResponseHeaderAsResult.Checked; //http头作为响应结果 e.ResponseHeaderName = cbxResponseHeaderName.Text; //响应头名 e.SkipIfFileExisted = chbSkipIfFileExisted.Checked; //文件存在则跳过 e.Static = chbStatic.Checked; //静态规则 e.TimeAsResult = chbTimeAsResult.Checked; //记录采集时间 e.UrlAsResult = chbUrlAsResult.Checked; //记录当前网址 e.UseClassDirectory = chbUseClassDirectory.Checked; //使用分类目录 e.UsePlugin = chbUsePlugin.Checked; //使用插件采集数据 e.UseRandomFileName = chbUseRandomFileName.Checked; //使用随机文件名 e.VirtualPath = txtVirtualPath.Text; //虚拟路径 //采集结果替换 foreach (Utility.ExtractionRulesItem item in LivExtractionRule.SelectedItems) { e.Replacements = item.rule.Replacements; } //保留的Html标志 for (int i = 0; i < chkHtmlMakeList.Items.Count; i++) { if (chkHtmlMakeList.GetSelected(i)) { foreach (HtmlMark h in htmlMarks) { if (((string)chkHtmlMakeList.Items[i]).Equals(h.DisplayName)) { e.ReservedHtmlMarks.Add(h); } } } } return e; }