private void OnWebPageLoaded(object sender, ChildPage page) { lock (childPages) { if (childPages.Any(val => val.url == page.url)) { Console.WriteLine(); } childPages.Add(page); waitForPages.Set(); } }
/*protected virtual void HandleResponse(HttpWebResponse response) * { * string htmlString = DecompressHtml(response); * * jobStatus = JobStatus.HandlingResponse; * if (htmlString != string.Empty) * { * if (results.Any(obj => obj.Value.GetType() == typeof(TextUpdate))) * { * foreach (TextUpdate textUpdate in results.Values) * { * textUpdate.FilterByTags(htmlString); * } * } * * if (results.Any(obj => obj.Value.GetType() == typeof(LinkFeed))) * { * MultiValueDictionary<string, ObjectId> links = new MultiValueDictionary<string, ObjectId>(); * foreach (LinkFeed feed in results.Values) * { * HashSet<string> filteredLinks = feed.FilterByTags(htmlString); * foreach (string link in filteredLinks) * { * links.Add(link, feed.recordid); * } * } * * jobStatus = JobStatus.LoadingPages; * LoadChildPages(links); * * foreach (ChildPage page in childPages) * { * foreach (ObjectId jobId in page.jobIds) * { * results[jobId].AddChildPage(page); * } * } * * jobStatus = JobStatus.RankingPages; * foreach (LinkFeed feed in results.Values) * { * feed.ProcessKeywordScores(); * } * } * * jobStatus = JobStatus.Finished; * timeStamp = DateTime.UtcNow; * WebCrawler.Instance.EnqueueResult(this); * } * }*/ private void LoadChildPages(MultiValueDictionary <string, ObjectId> links) { childPages = new List <ChildPage>(); waitForPages = new ManualResetEvent(false); foreach (KeyValuePair <string, IReadOnlyCollection <ObjectId> > pair in links) { ChildPage page = new ChildPage(pair.Key, DateTime.Now, pair.Value); page.WebPageLoaded += new EventHandler <ChildPage>(OnWebPageLoaded); page.LoadError += new EventHandler(OnLoadError); WebCrawler.Instance.EnqueueWork(page); } while (childPages.Count != links.Count) { waitForPages.Reset(); waitForPages.WaitOne(); } }
private void GetResponse(IAsyncResult webRequest) { HttpWebRequest request = (HttpWebRequest)webRequest.AsyncState; HttpWebResponse response = null; try { response = (HttpWebResponse)request.EndGetResponse(webRequest); //Console.WriteLine("\n\t\t\tLoading {0}", Domain.AbsoluteUri); } catch (WebException webEx) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(url); Console.ForegroundColor = ConsoleColor.Gray; Console.WriteLine(webEx.ToString()); if (this is HtmlRecord) { jobStatus = JobStatus.ErrorRequesting; } else { ChildPage page = this as ChildPage; page.InvokeLoadErrorEvent(); } if (waitTime < WebCrawler.TimeoutPeriod) { HttpWebResponse resp = webEx.Response as HttpWebResponse; if (resp != null) { HttpStatusCode statuscode = resp.StatusCode; switch (statuscode) { case (HttpStatusCode.Forbidden): serverResponse = statuscode; break; //throw webEx; case (HttpStatusCode.BadRequest): break; default: SetWaitTime(waitTime + 10000); WebCrawler.Instance.EnqueueWork(this); break; } } else { SetWaitTime(waitTime + 10000); WebCrawler.Instance.EnqueueWork(this); } } else { if (this is ChildPage) { ChildPage page = this as ChildPage; page.InvokeLoadedEvent(); } } } catch (Exception ex) { Console.WriteLine(ex.ToString()); throw ex; } finally { if (response != null) { HandleResponse(response); } } }
public void AddChildPage(ChildPage page) { childPages.Add(page); }