private Task<LinkResult> CreateTask(FluentConfiguration fluentConfiguration, HttpWebRequest webRequest, string originalUrl) { return Task<LinkResult>.Factory.StartNew(() => { if (fluentConfiguration.Bot != null) { fluentConfiguration.Bot.OnRequest(webRequest); } _beforeConfigurers.ForEach(x => x(webRequest)); var result = new LinkResult {Url = new Uri(originalUrl, UriKind.RelativeOrAbsolute)}; HttpWebResponse webResponse = null; try { webResponse = (HttpWebResponse) webRequest.GetResponse(); result.StatusCode = webResponse.StatusCode; } catch (WebException we) { result.StatusCode = we.Response != null ? ((HttpWebResponse) we.Response).StatusCode : HttpStatusCode.NotFound; } finally { if (webResponse != null) { _afterConfigurers.ForEach(x => x(webResponse)); webResponse.Close(); } } return result; }); }
public List<LinkResult> Start() { var fluentConfiguration = new FluentConfiguration(); _onConfigurer(new OnConfigurer(fluentConfiguration)); if (_botConfigurer != null) { _botConfigurer(new AsBotConfigurer(fluentConfiguration)); } var markup = fluentConfiguration.Html.GetHtml(); var links = new List<string>(); var tasks = new List<Task<LinkResult>>(); foreach (Match m in Regex.Matches(markup, @"(<a.*?>.*?</a>)", RegexOptions.Singleline)) { var value = m.Groups[1].Value; var m2 = Regex.Match(value, @"href=\""(.*?)\""", RegexOptions.Singleline); if (!m2.Success) continue; var href = m2.Groups[1].Value; if (links.Any(li => li == href) || !Uri.IsWellFormedUriString(href, fluentConfiguration.UriKind)) continue; links.Add(href); var webRequest = (HttpWebRequest)WebRequest.Create(href.AsAbsoluteUrl(fluentConfiguration.BaseUrl)); tasks.Add(CreateTask(fluentConfiguration, webRequest, href)); } return tasks.Select(x => x.Result).ToList(); }
public List <LinkResult> Start() { var fluentConfiguration = new FluentConfiguration(); _onConfigurer(new OnConfigurer(fluentConfiguration)); if (_botConfigurer != null) { _botConfigurer(new AsBotConfigurer(fluentConfiguration)); } var markup = fluentConfiguration.Html.GetHtml(); var links = new List <string>(); var tasks = new List <Task <LinkResult> >(); foreach (Match m in Regex.Matches(markup, @"(<a.*?>.*?</a>)", RegexOptions.Singleline)) { var value = m.Groups[1].Value; var m2 = Regex.Match(value, @"href=\""(.*?)\""", RegexOptions.Singleline); if (!m2.Success) { continue; } var href = m2.Groups[1].Value; if (links.Any(li => li == href) || !Uri.IsWellFormedUriString(href, fluentConfiguration.UriKind)) { continue; } links.Add(href); var webRequest = (HttpWebRequest)WebRequest.Create(href.AsAbsoluteUrl(fluentConfiguration.BaseUrl)); tasks.Add(CreateTask(fluentConfiguration, webRequest, href)); } return(tasks.Select(x => x.Result).ToList()); }
private Task <LinkResult> CreateTask(FluentConfiguration fluentConfiguration, HttpWebRequest webRequest, string originalUrl) { return(Task <LinkResult> .Factory.StartNew(() => { if (fluentConfiguration.Bot != null) { fluentConfiguration.Bot.OnRequest(webRequest); } _beforeConfigurers.ForEach(x => x(webRequest)); var result = new LinkResult { Url = new Uri(originalUrl, UriKind.RelativeOrAbsolute) }; HttpWebResponse webResponse = null; try { webResponse = (HttpWebResponse)webRequest.GetResponse(); result.StatusCode = webResponse.StatusCode; } catch (WebException we) { result.StatusCode = we.Response != null ? ((HttpWebResponse)we.Response).StatusCode : HttpStatusCode.NotFound; } finally { if (webResponse != null) { _afterConfigurers.ForEach(x => x(webResponse)); webResponse.Close(); } } return result; })); }
internal HtmlWithBaseUrlConfigurer(Uri baseUrl, FluentConfiguration configuration) : base(configuration) { _baseUrl = baseUrl; }
internal AsBotConfigurer(FluentConfiguration configuration) : base(configuration) { }
internal HtmlConfigurer(FluentConfiguration configuration) : base(configuration) { }