コード例 #1
0
        private void TimeoutCallback(object state, bool timedOut)
        {
            if (timedOut)
            {
                RequestState rs = state as RequestState;
                if (rs != null)
                {
                    rs.Req.Abort();

                    _log.Error("TimeoutCallback: url={0},HttpStatus={1}.", rs.Url, "Timeout");

                    UrlInfo urlInfo = new UrlInfo(rs.Url, "TimeoutCallback:TimeOut");
                    _dbm.write_to_db(urlInfo);

                    _reqsBusy[rs.Index] = false;
                    RequestResource(rs.Index);
                }
            }
        }
コード例 #2
0
        private void RequestResource(int index)
        {
            var urlAndType = GetUrlAndType(index);

            if (urlAndType == null)
            {
                return;
            }

            string  url     = urlAndType.Item1;
            UrlType urltype = urlAndType.Item2;

            try
            {
                _log.Info("Request {0} Time:{1}.", url, DateTime.Now.ToString());

                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
                req.Method          = _method;    //请求方法
                req.Accept          = _accept;    //接受的内容
                req.CookieContainer = GetCookie();
                req.UserAgent       = _userAgent; //用户代理
                RequestState rs     = new RequestState(req, url, urltype, index);
                var          result = req.BeginGetResponse(new AsyncCallback(ReceivedResource), rs);
                ThreadPool.RegisterWaitForSingleObject(result.AsyncWaitHandle,
                                                       TimeoutCallback, rs, _maxTime, true);
            }
            catch (WebException we)
            {
                _log.Error("RequestResource: url={0},HttpStatus={1}, Exception:{2}.", url, we.Status, we.Message);
                _log.Error(we.StackTrace);

                UrlInfo urlInfo = new UrlInfo(url, we.Status.ToString());
                _dbm.write_to_db(urlInfo);

                _reqsBusy[index] = false;
            }

            if (!_reqsBusy[index])
            {
                RequestResource(index);
            }
        }
コード例 #3
0
        private void AddUrls(string url, UrlType urlType)
        {
            if (urlType >= UrlType.UrlTypeMax)
            {
                return;
            }

            string cleanUrl = url.Trim();
            int    end      = cleanUrl.IndexOf(' ');

            if (end > 0)
            {
                cleanUrl = cleanUrl.Substring(0, end);
            }
            cleanUrl = cleanUrl.TrimEnd('/');
            if (UrlAvailable(cleanUrl))
            {
                if (cleanUrl.Contains("book.douban.com/tag") || cleanUrl.Contains("book.douban.com/subject"))
                {
                    if (urlType == UrlType.OneBookUrl)
                    {
                        _urlsUnloadBooks.Add(cleanUrl, urlType);
                    }
                    else
                    {
                        _urlsUnloadTags.Add(cleanUrl, urlType);
                    }
                    UrlInfo urlInfo = new UrlInfo(cleanUrl, urlType);
                    _dbm.write_to_db(urlInfo);
                }
                else
                {
                    _log.Debug("Try add url failed:{0}.", cleanUrl);
                    //do nothing
                }
            }
        }
コード例 #4
0
        public void write_to_db(UrlInfo urlInfo)
        {
            lock (_urlLocker)
            {
                if (_LoadedWebUrl.Contains(urlInfo._WebUrl))
                {
                    _updateUrl_cache.Add(urlInfo);
                }
                else
                {
                    _insertUrl_cache.Add(urlInfo);
                }

                if (_insertUrl_cache.Count >= _cache_cnt)
                {
                    insertWebrlToDb();
                }

                if (_updateUrl_cache.Count >= _cache_cnt)
                {
                    updateWeburlToDb();
                }
            }
        }
コード例 #5
0
        private void ReceivedData(IAsyncResult ar)
        {
            RequestState   rs        = (RequestState)ar.AsyncState;
            HttpWebRequest req       = rs.Req;
            Stream         resStream = rs.ResStream;
            string         url       = rs.Url;
            UrlType        urltype   = rs.WebUrlType;
            string         html      = null;
            int            index     = rs.Index;
            int            read      = 0;
            string         HttpStatus;

            try
            {
                read = resStream.EndRead(ar);
                if (_stop)
                {
                    rs.ResStream.Close();
                    req.Abort();
                    return;
                }
                if (read > 0)
                {
                    MemoryStream ms     = new MemoryStream(rs.Data, 0, read);
                    StreamReader reader = new StreamReader(ms, _encoding);
                    string       str    = reader.ReadToEnd();
                    rs.Html.Append(str);
                    var result = resStream.BeginRead(rs.Data, 0, rs.BufferSize,
                                                     new AsyncCallback(ReceivedData), rs);
                    return;
                }
                html = rs.Html.ToString();
                SgmlReader sgmlRreader = new SgmlReader();
                sgmlRreader.DocType     = "HTML";
                sgmlRreader.InputStream = new StringReader(html);
                StringWriter  sw     = new StringWriter();
                XmlTextWriter writer = new XmlTextWriter(sw);
                writer.Formatting = Formatting.Indented;
                while (sgmlRreader.Read())
                {
                    if (sgmlRreader.NodeType != XmlNodeType.Whitespace)
                    {
                        writer.WriteNode(sgmlRreader, true);
                    }
                }

                SaveContents(sw.ToString(), url, urltype);
                HttpStatus = WebExceptionStatus.Success.ToString();
            }
            catch (WebException we)
            {
                _log.Error("ReceivedData: url = {0}, HttpStatus = {1}, Exception:{2}.", url, we.Status, we.Message);
                _log.Error(we.StackTrace);

                HttpStatus = we.Status.ToString();
            }
            catch (Exception e)
            {
                _log.Error("ReceivedData: url = {0}, Exception:{1}.", url, e.Message);
                _log.Error(e.StackTrace);

                HttpStatus = e.Message;
            }

            UrlInfo urlInfo = new UrlInfo(url, HttpStatus);

            _dbm.write_to_db(urlInfo);

            if (ContentsSaved != null)
            {
                ContentsSaved(HttpStatus, url);
            }

            _reqsBusy[index] = false;
            RequestResource(index);
        }
コード例 #6
0
        private void ReceivedResource(IAsyncResult ar)
        {
            RequestState   rs  = (RequestState)ar.AsyncState;
            HttpWebRequest req = rs.Req;
            string         url = rs.Url;

            try
            {
                HttpWebResponse res = (HttpWebResponse)req.EndGetResponse(ar);
                if (_stop)
                {
                    res.Close();
                    req.Abort();
                    return;
                }
                if (res != null && res.StatusCode == HttpStatusCode.OK)
                {
                    Stream resStream = res.GetResponseStream();
                    rs.ResStream = resStream;
                    var result = resStream.BeginRead(rs.Data, 0, rs.BufferSize,
                                                     new AsyncCallback(ReceivedData), rs);
                }
                else
                {
                    res.Close();
                    rs.Req.Abort();
                    _reqsBusy[rs.Index] = false;
                }
            }
            catch (WebException we)
            {
                _log.Error("ReceivedResource: url = {0}, HttpStatus = {1}, Exception:{2}.", url, we.Status, we.Message);
                _log.Error(we.StackTrace);
                UrlInfo urlInfo = new UrlInfo(url, we.Status.ToString());
                _dbm.write_to_db(urlInfo);

                if (ContentsSaved != null)
                {
                    ContentsSaved(we.Status.ToString(), url);
                }

                _reqsBusy[rs.Index] = false;
            }
            catch (Exception e)
            {
                _log.Error("ReceivedResource: url = {0}, Exception:{1}.", url, e.Message);
                _log.Error(e.StackTrace);

                UrlInfo urlInfo = new UrlInfo(url, e.Message);
                _dbm.write_to_db(urlInfo);

                if (ContentsSaved != null)
                {
                    ContentsSaved(e.Message, url);
                }

                _reqsBusy[rs.Index] = false;
            }

            if (!_reqsBusy[rs.Index])
            {
                RequestResource(rs.Index);
            }
        }