private async void StartCrawlerAsync() { while ((UrlToDo.Count != 0) && ((urlStatus.Status != Models.EnumStatus.onPause)) && ((urlStatus.Status != Models.EnumStatus.onStop))) { try { CurrentUrl = UrlToDo[0]; NotifyPropertyChanged(CurrentUrl); //HtmlWeb hw = new HtmlWeb(); HtmlResponseData replay = await GetHtmlDocument(CurrentUrl); if (replay != null) { AnalizePage(replay); UrlDone.Add(replay.AbsoluteUri); NotifyPropertyChanged("UrlDone"); } else { UrlError.Add(CurrentUrl); } UrlToDo.RemoveAt(0); NotifyPropertyChanged("UrlToDo"); } catch (Exception err) { UrlError.Add(CurrentUrl); NotifyPropertyChanged("UrlError"); } } if (UrlToDo.Count == 0) { urlStatus.Status = Models.EnumStatus.finish; CurrentUrl = ""; } else { UrlStatus.Status = Models.EnumStatus.onStartup; } NotifyPropertyChanged("UrlStatus"); }
private bool AnalizePage(HtmlResponseData replay) { try { HtmlNodeCollection _list = replay.document.DocumentNode.SelectNodes("//a[@href]"); if (_list.Count == 0) { return(false); } foreach (HtmlNode link in _list) { HtmlAttribute att = null; try { att = link.Attributes["href"]; } catch (Exception err2) { var dedug2 = ""; } if (att != null) { //HtmlAttribute att = link.Attributes["href"]; string linkToAdd = IsValidLink(att.Value); if (linkToAdd != null) { UrlToDo.Add(linkToAdd.ToLower().Trim()); NotifyPropertyChanged("UrlDone"); } } } NotifyPropertyChanged(); } catch (Exception err1) { var dedug1 = ""; return(false); } return(true); }
private async Task <HtmlResponseData> GetHtmlDocument(string url) { HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url); HtmlResponseData data = null; try { WebResponse myResponse = await request.GetResponseAsync(); HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.OptionFixNestedTags = true; htmlDoc.Load(myResponse.GetResponseStream()); data = new HtmlResponseData() { AbsoluteUri = myResponse.ResponseUri.AbsoluteUri.Trim().ToLower(), document = htmlDoc }; } catch { } return(data); }