public LinkCrawler(IEnumerable<IOutput> outputs, IValidUrlParser validUrlParser, ISettings settings) { BaseUrl = settings.BaseUrl; Outputs = outputs; ValidUrlParser = validUrlParser; CheckImages = settings.CheckImages; VisitedUrlList = new List<string>(); RestRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*"); OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput; _settings = settings; }
public LinkCrawler(ISlackClient slackClient, IValidUrlParser validUrlParser, ISettings settings) { SlackClient = slackClient; BaseUrl = settings.BaseUrl; RestClient = new RestClient(); ValidUrlParser = validUrlParser; CheckImages = settings.CheckImages; VisitedUrlList = new List <string>(); RestRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*"); OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput; }
public LinkCrawler(IEnumerable <IOutput> outputs, IValidUrlParser validUrlParser, ISettings settings) { BaseUrl = settings.BaseUrl; Outputs = outputs; ValidUrlParser = validUrlParser; CheckImages = settings.CheckImages; VisitedUrlList = new List <string>(); RestRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*"); OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput; _settings = settings; }
/// <summary> /// Get's a list of all urls in markup and tires to fix the urls that Restsharp will have a problem with /// (i.e relative urls, urls with no sceme, mailto links..etc) /// </summary> /// <returns>List of urls that will work with restsharp for sending http get</returns> public static List<string> GetValidUrlListFromMarkup(string markup, IValidUrlParser parser, bool checkImages) { var urlList = GetAllUrlsFromMarkup(markup, checkImages); var validUrlList = new List<string>(); foreach (var url in urlList) { string validUrl; if (parser.Parse(url, out validUrl)) { validUrlList.Add(validUrl); } } return validUrlList; }
/// <summary> /// Get's a list of all urls in markup and tires to fix the urls that Restsharp will have a problem with /// (i.e relative urls, urls with no sceme, mailto links..etc) /// </summary> /// <returns>List of urls that will work with restsharp for sending http get</returns> public static List <string> GetValidUrlListFromMarkup(string markup, IValidUrlParser parser, bool checkImages) { var urlList = GetAllUrlsFromMarkup(markup, checkImages); var validUrlList = new List <string>(); foreach (var url in urlList) { string validUrl; if (parser.Parse(url, out validUrl)) { validUrlList.Add(validUrl); } } return(validUrlList); }
public LinkCrawler(IEnumerable <IOutput> outputs, IValidUrlParser validUrlParser, ISettings settings) { _httpClient = new HttpClient(); _httpClient.DefaultRequestHeaders.Add("User-Agent", "curl/7.54.0"); _httpClient.DefaultRequestHeaders.Add("Accept", "*/*"); msSleepBetweenRequests = settings.TimeMsBetweenRequests; Outputs = outputs; ValidUrlParser = validUrlParser; UrlList = new HashSet <LinkModel>(new LinkListComparer()); //to have only unique addresses in the list OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput; _settings = settings; this.timer = new Stopwatch(); stopWordsList = ReadStopWords(); }