/// <summary> /// 提取采集规则返回一个单元格 /// </summary> /// <param name="rule">采集规则</param> /// <returns>采集结果</returns> private string ExtractionColumn(ExtractionRule r) { string result = _htmlText; //采集时间作为结果 if (r.TimeAsResult) { return(DateTime.Now.ToString()); } //将固定值最为结果 if (r.ConstantAsResult && !r.Static) { return(r.ConstantValue); } //记录当前网址 if (r.UrlAsResult) { return(_response.ResponseUri.AbsoluteUri); } //响应头作为结果 if (r.ResponseHeaderAsResult) { return(_response.Headers[r.ResponseHeaderName]); } //if(extractionRule.PostParametersAsResult) return this.HttpHelper.WebRequest.GetRequestStream(); //POST参数作为结果 //if (extractionRule.LinkTextAsResult) return ""; //链接文本作为结果 //截取内容 result = Smart.Utility.StringHelper.SubString(result, r.PreviousFlag, r.FollowingFlag); //使用正则表达式采集结果 if (r.Static) { //如果静态规则选中,则固定值值作为结果为正则表达式。 MatchCollection coll = Regex.Matches(result, r.ConstantValue); result = string.Empty; foreach (Match m in coll) { result += m.Value; } } //采集结果替换 result = ResultReplace(result, r.Replacements); //过滤Html标记 result = FilterHtmlMark(result, r.ReservedHtmlMarks); //过滤掉无效字符:空格、换行符、制表符 result = result.Replace(" ", ""); result = result.Replace("\t", ""); result = result.Replace("\n", ""); result = result.Replace("\r", ""); result = result.Trim(); return(result); }
/// <summary> /// 提取采集规则返回一个单元格 /// </summary> /// <param name="rule">采集规则</param> /// <returns>采集结果</returns> private string ExtractionColumn(ExtractionRule r) { string result = _htmlText; //采集时间作为结果 if (r.TimeAsResult) { return DateTime.Now.ToString(); } //将固定值最为结果 if (r.ConstantAsResult && !r.Static) { return r.ConstantValue; } //记录当前网址 if (r.UrlAsResult) { return _response.ResponseUri.AbsoluteUri; } //响应头作为结果 if (r.ResponseHeaderAsResult) { return _response.Headers[r.ResponseHeaderName]; } //if(extractionRule.PostParametersAsResult) return this.HttpHelper.WebRequest.GetRequestStream(); //POST参数作为结果 //if (extractionRule.LinkTextAsResult) return ""; //链接文本作为结果 //截取内容 result = Smart.Utility.StringHelper.SubString(result, r.PreviousFlag, r.FollowingFlag); //使用正则表达式采集结果 if (r.Static) { //如果静态规则选中,则固定值值作为结果为正则表达式。 MatchCollection coll = Regex.Matches(result, r.ConstantValue); result = string.Empty; foreach (Match m in coll) { result += m.Value; } } //采集结果替换 result = ResultReplace(result, r.Replacements); //过滤Html标记 result = FilterHtmlMark(result, r.ReservedHtmlMarks); //过滤掉无效字符:空格、换行符、制表符 result = result.Replace(" ", ""); result = result.Replace("\t", ""); result = result.Replace("\n", ""); result = result.Replace("\r", ""); result = result.Trim(); return result; }