public void WeaveWeb() { Reset(); var rootLink = new LinkElement() { url = _originalUrl.AbsoluteUri }; _fullUrlList.Add(rootLink); List <LinkElement> nextLnks = (from lnk in _fullUrlList where lnk.explored == false select lnk).ToList(); while (nextLnks.Count() > 0) { Parallel.ForEach <LinkElement>(nextLnks, (le) => { ExploreLink(le); }); nextLnks = (from lnk in _fullUrlList where lnk.explored == false select lnk).ToList(); } }
private void MarkLinkAsExplored(LinkElement linkElement) { lock (_lockbject) { var linkElementAdded = _fullUrlList.Add(linkElement); if (!linkElementAdded) { _fullUrlList.Where(link => link.url == linkElement.url).First().explored = true; } else { linkElement.explored = true; } } }
public void WeaveWeb() { Reset(); var rootLink = new LinkElement() { url = _originalUrl.AbsoluteUri }; _fullUrlList.Add(rootLink); List<LinkElement> nextLnks = (from lnk in _fullUrlList where lnk.explored == false select lnk).ToList(); while (nextLnks.Count() > 0) { Parallel.ForEach<LinkElement>(nextLnks, (le) => { ExploreLink(le); }); nextLnks = (from lnk in _fullUrlList where lnk.explored == false select lnk).ToList(); } }
public void WeaveSinglePage() { Reset(); string htmlFragment = string.Empty; var rootLink = new LinkElement() { url = _originalUrl.AbsoluteUri }; _fullUrlList.Add(rootLink); try { htmlFragment = _httpClient.GetStringAsync(_originalUrl.AbsoluteUri).Result; } catch { _brokenUrlList.Add(_originalUrl.AbsoluteUri); return; } GetCompleteLinksFromHtmlFragment(htmlFragment); }
private void ExploreLink(LinkElement linkElement) { string htmlFragment = string.Empty; if (MatchExplorationFilters(linkElement.url)) { linkElement.explored = true; } else { try { htmlFragment = _httpClient.GetStringAsync(linkElement.url).Result; } catch { _brokenUrlList.Add(linkElement.url); return; } GetCompleteLinksFromHtmlFragment(htmlFragment); MarkLinkAsExplored(linkElement); } }
private void MarkLinkAsExplored(LinkElement linkElement) { lock (_lockbject) { var linkElementAdded = _fullUrlList.Add(linkElement); if (!linkElementAdded) _fullUrlList.Where(link => link.url == linkElement.url).First().explored = true; else linkElement.explored = true; } }