Exemple #1
0
 public LinkCrawler(IEnumerable<IOutput> outputs, IValidUrlParser validUrlParser, ISettings settings)
 {
     BaseUrl = settings.BaseUrl;
     Outputs = outputs;
     ValidUrlParser = validUrlParser;
     CheckImages = settings.CheckImages;
     VisitedUrlList = new List<string>();
     RestRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*");
     OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput;
     _settings = settings;
 }
 public LinkCrawler(ISlackClient slackClient, IValidUrlParser validUrlParser, ISettings settings)
 {
     SlackClient    = slackClient;
     BaseUrl        = settings.BaseUrl;
     RestClient     = new RestClient();
     ValidUrlParser = validUrlParser;
     CheckImages    = settings.CheckImages;
     VisitedUrlList = new List <string>();
     RestRequest    = new RestRequest(Method.GET).SetHeader("Accept", "*/*");
     OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput;
 }
Exemple #3
0
 public LinkCrawler(IEnumerable <IOutput> outputs, IValidUrlParser validUrlParser, ISettings settings)
 {
     BaseUrl        = settings.BaseUrl;
     Outputs        = outputs;
     ValidUrlParser = validUrlParser;
     CheckImages    = settings.CheckImages;
     VisitedUrlList = new List <string>();
     RestRequest    = new RestRequest(Method.GET).SetHeader("Accept", "*/*");
     OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput;
     _settings = settings;
 }
        /// <summary>
        /// Get's a list of all urls in markup and tires to fix the urls that Restsharp will have a problem with 
        /// (i.e relative urls, urls with no sceme, mailto links..etc)
        /// </summary>
        /// <returns>List of urls that will work with restsharp for sending http get</returns>
        public static List<string> GetValidUrlListFromMarkup(string markup, IValidUrlParser parser, bool checkImages)
        {
            var urlList = GetAllUrlsFromMarkup(markup, checkImages);
            var validUrlList = new List<string>();

            foreach (var url in urlList)
            {
                string validUrl;
                if (parser.Parse(url, out validUrl))
                {
                    validUrlList.Add(validUrl);
                }
            }
            return validUrlList;
        }
Exemple #5
0
        /// <summary>
        /// Get's a list of all urls in markup and tires to fix the urls that Restsharp will have a problem with
        /// (i.e relative urls, urls with no sceme, mailto links..etc)
        /// </summary>
        /// <returns>List of urls that will work with restsharp for sending http get</returns>
        public static List <string> GetValidUrlListFromMarkup(string markup, IValidUrlParser parser, bool checkImages)
        {
            var urlList      = GetAllUrlsFromMarkup(markup, checkImages);
            var validUrlList = new List <string>();

            foreach (var url in urlList)
            {
                string validUrl;
                if (parser.Parse(url, out validUrl))
                {
                    validUrlList.Add(validUrl);
                }
            }
            return(validUrlList);
        }
        public LinkCrawler(IEnumerable <IOutput> outputs, IValidUrlParser validUrlParser, ISettings settings)
        {
            _httpClient = new HttpClient();
            _httpClient.DefaultRequestHeaders.Add("User-Agent", "curl/7.54.0");
            _httpClient.DefaultRequestHeaders.Add("Accept", "*/*");
            msSleepBetweenRequests = settings.TimeMsBetweenRequests;

            Outputs        = outputs;
            ValidUrlParser = validUrlParser;
            UrlList        = new HashSet <LinkModel>(new LinkListComparer()); //to have only unique addresses in the list
            OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput;
            _settings     = settings;
            this.timer    = new Stopwatch();
            stopWordsList = ReadStopWords();
        }