private SpiderSetting spiderSetting; //关联的Spider配置信息 #endregion Fields #region Constructors /// <summary> /// 构造函数 /// </summary> /// <param name="url">请求的Url</param> /// <param name="responseHeaders">HTTP响应头集合</param> internal RequestContext(SpiderSetting setting, Url url, HttpWebResponse response) { this.spiderSetting = setting; this.requestUrl = url; this.contentType = Content.ContentType.Unknown; this.contentEncoding = Encoding.Default; this.headers = new NameValueCollection(response.Headers); //从Headers[contentType]字符串,如 text/html;charset=gb2312,初始化ContentType和ContentEncoding StringDictionary items = Utils.DetectContentTypeHeader(response.Headers[HttpResponseHeader.ContentType]); this.mime = items["mime"] == null ? "" : items["mime"]; this.charset = items["charset"] == null ? "" : items["charset"]; if (this.mime.StartsWith("text/") || this.mime == "application/x-javascript") { this.contentType = Content.ContentType.Text; } else { this.contentType = Content.ContentType.Binary; } if (this.charset == "") { this.contentEncoding = Encoding.Default; } else { this.contentEncoding = Encoding.GetEncoding(this.charset); } }
private NameValueCollection headers; //HTTP响应头集合 /// <summary> /// 构造函数 /// </summary> /// <param name="url">请求的Url</param> /// <param name="responseHeaders">HTTP响应头集合</param> internal RequestContext(SpiderSetting setting, Url url, HttpWebResponse response) { this.spiderSetting = setting; this.requestUrl = url; this.contentType = Content.ContentType.Unknown; this.contentEncoding = Encoding.Default; this.headers = new NameValueCollection(response.Headers); //从Headers[contentType]字符串,如 text/html;charset=gb2312,初始化ContentType和ContentEncoding StringDictionary items = Utils.DetectContentTypeHeader(response.Headers[HttpResponseHeader.ContentType]); this.mime = items["mime"] == null ? "" : items["mime"]; this.charset = items["charset"] == null ? "" : items["charset"]; if (this.mime.StartsWith("text/") || this.mime == "application/x-javascript") { this.contentType = Content.ContentType.Text; } else { this.contentType = Content.ContentType.Binary; } if (this.charset == "") { this.contentEncoding = Encoding.Default; } else { this.contentEncoding = Encoding.GetEncoding(this.charset); } }
/// <summary> /// 反序列化 /// </summary> /// <param name="info">SerializationInfo</param> /// <param name="context">StreamingContext</param> private RequestContext(SerializationInfo info, StreamingContext context) { this.spiderSetting = info.GetValue("spiderSetting", typeof(SpiderSetting)) as SpiderSetting; this.requestUrl = info.GetValue("requestUrl", typeof(Url)) as Url; this.mime = info.GetString("mime"); this.contentType = (Content.ContentType)info.GetValue("contentType", typeof(Content.ContentType)); this.contentEncoding = info.GetValue("contentEncoding", typeof(Encoding)) as Encoding; this.charset = info.GetString("charset"); this.headers = info.GetValue("headers", typeof(NameValueCollection)) as NameValueCollection; }
/// <summary> /// 反序列化 /// </summary> /// <param name="info">SerializationInfo</param> /// <param name="context">StreamingContext</param> private RequestContext(SerializationInfo info, StreamingContext context) { this.spiderSetting = info.GetValue("spiderSetting", typeof(SpiderSetting)) as SpiderSetting; this.requestUrl = info.GetValue("requestUrl", typeof(Url)) as Url; this.mime = info.GetString("mime"); this.contentType = (Content.ContentType)info.GetValue("contentType", typeof(Content.ContentType)); this.contentEncoding = info.GetValue("contentEncoding", typeof(Encoding)) as Encoding; this.charset = info.GetString("charset"); this.headers = info.GetValue("headers", typeof(NameValueCollection)) as NameValueCollection; }
/// <summary> /// 读取配置文件,初始化SpiderSetting实例 /// </summary> /// <param name="setting">SpiderSetting实例</param> public void ReadSetting(SpiderSetting setting) { if (Directory.Exists(this.fileName)) { throw new FileNotFoundException(this.fileName + " not be found!"); } XmlDocument doc = new XmlDocument(); doc.Load(this.fileName); }
/// <summary> /// 读取配置文件,初始化SpiderSetting实例 /// </summary> /// <param name="setting">SpiderSetting实例</param> public void ReadSetting(SpiderSetting setting) { if (Directory.Exists(this.fileName)) { throw new FileNotFoundException(this.fileName + " not be found!"); } XmlDocument doc = new XmlDocument(); doc.Load(this.fileName); }
private UrlQueue urlQueue; //URL队列 #endregion Fields #region Constructors /// <summary> /// 构造函数 /// </summary> /// <param name="st">SpiderSetting</param> /// <param name="urlMgr">UrlManager</param> public SpiderBase(SpiderSetting st, UrlManager urlMgr) : this() { this.settings = st; this.urlManager = urlMgr; }
/// <summary> /// 写入配置信息到文件 /// </summary> /// <param name="setting"></param> public void WriteSetting(SpiderSetting setting) { }
public LibaSpider(SpiderSetting st, UrlManager m) : base(st, m) { maxMemoSize = 0; }
/// <summary> /// 写入配置信息到文件 /// </summary> /// <param name="setting"></param> public void WriteSetting(SpiderSetting setting) { }
/// <summary> /// 构造函数 /// </summary> /// <param name="st">SpiderSetting</param> /// <param name="urlMgr">UrlManager</param> public SpiderBase(SpiderSetting st, UrlManager urlMgr) : this() { this.settings = st; this.urlManager = urlMgr; }