Ejemplo n.º 1
0
        private SpiderSetting spiderSetting; //关联的Spider配置信息

        #endregion Fields

        #region Constructors

        /// <summary>
        /// 构造函数
        /// </summary>
        /// <param name="url">请求的Url</param>
        /// <param name="responseHeaders">HTTP响应头集合</param>
        internal RequestContext(SpiderSetting setting, Url url, HttpWebResponse response)
        {
            this.spiderSetting = setting;
            this.requestUrl = url;
            this.contentType = Content.ContentType.Unknown;
            this.contentEncoding = Encoding.Default;
            this.headers = new NameValueCollection(response.Headers);

            //从Headers[contentType]字符串,如 text/html;charset=gb2312,初始化ContentType和ContentEncoding
            StringDictionary items = Utils.DetectContentTypeHeader(response.Headers[HttpResponseHeader.ContentType]);
            this.mime = items["mime"] == null ? "" : items["mime"];
            this.charset = items["charset"] == null ? "" : items["charset"];

            if (this.mime.StartsWith("text/") || this.mime == "application/x-javascript")
            {
                this.contentType = Content.ContentType.Text;
            }
            else
            {
                this.contentType = Content.ContentType.Binary;
            }

            if (this.charset == "")
            {
                this.contentEncoding = Encoding.Default;
            }
            else
            {
                this.contentEncoding = Encoding.GetEncoding(this.charset);
            }
        }
Ejemplo n.º 2
0
        private NameValueCollection headers;        //HTTP响应头集合

        /// <summary>
        /// 构造函数
        /// </summary>
        /// <param name="url">请求的Url</param>
        /// <param name="responseHeaders">HTTP响应头集合</param>
        internal RequestContext(SpiderSetting setting, Url url, HttpWebResponse response)
        {
            this.spiderSetting   = setting;
            this.requestUrl      = url;
            this.contentType     = Content.ContentType.Unknown;
            this.contentEncoding = Encoding.Default;
            this.headers         = new NameValueCollection(response.Headers);

            //从Headers[contentType]字符串,如 text/html;charset=gb2312,初始化ContentType和ContentEncoding
            StringDictionary items = Utils.DetectContentTypeHeader(response.Headers[HttpResponseHeader.ContentType]);

            this.mime    = items["mime"] == null ? "" : items["mime"];
            this.charset = items["charset"] == null ? "" : items["charset"];

            if (this.mime.StartsWith("text/") || this.mime == "application/x-javascript")
            {
                this.contentType = Content.ContentType.Text;
            }
            else
            {
                this.contentType = Content.ContentType.Binary;
            }

            if (this.charset == "")
            {
                this.contentEncoding = Encoding.Default;
            }
            else
            {
                this.contentEncoding = Encoding.GetEncoding(this.charset);
            }
        }
Ejemplo n.º 3
0
 /// <summary>
 /// 反序列化
 /// </summary>
 /// <param name="info">SerializationInfo</param>
 /// <param name="context">StreamingContext</param>
 private RequestContext(SerializationInfo info, StreamingContext context)
 {
     this.spiderSetting   = info.GetValue("spiderSetting", typeof(SpiderSetting)) as SpiderSetting;
     this.requestUrl      = info.GetValue("requestUrl", typeof(Url)) as Url;
     this.mime            = info.GetString("mime");
     this.contentType     = (Content.ContentType)info.GetValue("contentType", typeof(Content.ContentType));
     this.contentEncoding = info.GetValue("contentEncoding", typeof(Encoding)) as Encoding;
     this.charset         = info.GetString("charset");
     this.headers         = info.GetValue("headers", typeof(NameValueCollection)) as NameValueCollection;
 }
Ejemplo n.º 4
0
 /// <summary>
 /// 反序列化
 /// </summary>
 /// <param name="info">SerializationInfo</param>
 /// <param name="context">StreamingContext</param>
 private RequestContext(SerializationInfo info, StreamingContext context)
 {
     this.spiderSetting = info.GetValue("spiderSetting", typeof(SpiderSetting)) as SpiderSetting;
     this.requestUrl = info.GetValue("requestUrl", typeof(Url)) as Url;
     this.mime = info.GetString("mime");
     this.contentType = (Content.ContentType)info.GetValue("contentType", typeof(Content.ContentType));
     this.contentEncoding = info.GetValue("contentEncoding", typeof(Encoding)) as Encoding;
     this.charset = info.GetString("charset");
     this.headers = info.GetValue("headers", typeof(NameValueCollection)) as NameValueCollection;
 }
        /// <summary>
        /// 读取配置文件,初始化SpiderSetting实例
        /// </summary>
        /// <param name="setting">SpiderSetting实例</param>
        public void ReadSetting(SpiderSetting setting)
        {
            if (Directory.Exists(this.fileName))
            {
                throw new FileNotFoundException(this.fileName + " not be found!");
            }

            XmlDocument doc = new XmlDocument();
            doc.Load(this.fileName);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// 读取配置文件,初始化SpiderSetting实例
        /// </summary>
        /// <param name="setting">SpiderSetting实例</param>
        public void ReadSetting(SpiderSetting setting)
        {
            if (Directory.Exists(this.fileName))
            {
                throw new FileNotFoundException(this.fileName + " not be found!");
            }

            XmlDocument doc = new XmlDocument();

            doc.Load(this.fileName);
        }
Ejemplo n.º 7
0
        private UrlQueue urlQueue; //URL队列

        #endregion Fields

        #region Constructors

        /// <summary>
        /// 构造函数
        /// </summary>
        /// <param name="st">SpiderSetting</param>
        /// <param name="urlMgr">UrlManager</param>
        public SpiderBase(SpiderSetting st, UrlManager urlMgr)
            : this()
        {
            this.settings = st;
            this.urlManager = urlMgr;
        }
Ejemplo n.º 8
0
 /// <summary>
 /// 写入配置信息到文件
 /// </summary>
 /// <param name="setting"></param>
 public void WriteSetting(SpiderSetting setting)
 {
 }
Ejemplo n.º 9
0
 public LibaSpider(SpiderSetting st, UrlManager m)
     : base(st, m)
 {
     maxMemoSize = 0;
 }
 /// <summary>
 /// 写入配置信息到文件
 /// </summary>
 /// <param name="setting"></param>
 public void WriteSetting(SpiderSetting setting)
 {
 }
Ejemplo n.º 11
0
 /// <summary>
 /// 构造函数
 /// </summary>
 /// <param name="st">SpiderSetting</param>
 /// <param name="urlMgr">UrlManager</param>
 public SpiderBase(SpiderSetting st, UrlManager urlMgr)
     : this()
 {
     this.settings   = st;
     this.urlManager = urlMgr;
 }