Beispiel #1
0
 private void OnWebPageLoaded(object sender, ChildPage page)
 {
     lock (childPages)
     {
         childPages.Add(page);
         waitForPages.Set();
     }
 }
Beispiel #2
0
        private void LoadChildPages(MultiValueDictionary <string, ObjectId> links)
        {
            childPages   = new List <ChildPage>();
            waitForPages = new ManualResetEvent(false);

            foreach (KeyValuePair <string, IReadOnlyCollection <ObjectId> > pair in links)
            {
                ChildPage page = new ChildPage(pair.Key, DateTime.Now, pair.Value);

                page.WebPageLoaded += new EventHandler <ChildPage>(OnWebPageLoaded);
                page.LoadError     += new EventHandler(OnLoadError);
                WebCrawler.Instance.EnqueueWork(page);
            }

            while (childPages.Count != links.Count)
            {
                waitForPages.Reset();
                waitForPages.WaitOne();
            }
        }
Beispiel #3
0
 public void AddChildPage(ChildPage page)
 {
     childPages.Add(page);
 }
Beispiel #4
0
        private void GetResponse(IAsyncResult webRequest)
        {
            HttpWebRequest  request  = (HttpWebRequest)webRequest.AsyncState;
            HttpWebResponse response = null;

            try
            {
                response = (HttpWebResponse)request.EndGetResponse(webRequest);
                System.Diagnostics.Debug.Print("\tLoading: " + domain.AbsoluteUri);
            }
            catch (WebException webEx)
            {
                System.Diagnostics.Debug.Print("\t[" + domain.AbsoluteUri + "]");
                System.Diagnostics.Debug.Print("\t" + webEx.ToString());

                if (this is HtmlRecord)
                {
                    jobStatus = JobStatus.ErrorRequesting;
                }
                else
                {
                    ChildPage page = this as ChildPage;
                    page.InvokeLoadErrorEvent();
                }

                if (waitTime < WebCrawler.TimeoutPeriod)
                {
                    HttpWebResponse resp = webEx.Response as HttpWebResponse;
                    if (resp != null)
                    {
                        HttpStatusCode statuscode = resp.StatusCode;
                        switch (statuscode)
                        {
                        case HttpStatusCode.Forbidden:
                            serverResponse = statuscode;
                            KillProcess();
                            break;

                        case HttpStatusCode.BadRequest:
                            serverResponse = statuscode;
                            KillProcess();
                            break;

                        default:
                            SetWaitTime(waitTime + 10000);
                            WebCrawler.Instance.EnqueueWork(this);
                            break;
                        }
                    }
                    else
                    {
                        SetWaitTime(waitTime + 10000);
                        WebCrawler.Instance.EnqueueWork(this);
                    }
                }
                else
                {
                    if (this is ChildPage)
                    {
                        // Allow parent thread to continue, disregard this web page
                        ChildPage page = this as ChildPage;
                        page.InvokeLoadedEvent();
                    }
                }
            }
            catch (Exception ex)
            {
                System.Diagnostics.Debug.WriteLine(ex.ToString());
            }
            finally
            {
                if (response != null)
                {
                    HandleResponse(response);
                }
            }
        }