Exemplo n.º 1
0
        public Crawler()
        {
            LinkExtractor = new RegexLinkExtractor().Extract;
            Requester     = new HttpClient();
            Store         = new InMemoryUrlStore();

            // TODO: read from config file
            Start();
        }
Exemplo n.º 2
0
        public Crawler()
        {
            LinkExtractor = new RegexLinkExtractor().Extract;
            Requester = new HttpClient();
            Store = new InMemoryUrlStore();

            // TODO: read from config file
            Start();
        }
        public void GetLinks_WithNoFilterOnClass_ReturnsLinks()
        {
            var regexLinkExtractor = new RegexLinkExtractor();

            var html = "<html>" +
                       "<body>" +
                       "<a href=\"www.microsoft.com\">A link to microsoft</a>" +
                       "<a href=\"www.google.com\">A link to google</a>" +
                       "</body>" +
                       "</html>";

            var doc = new Document(html);

            var result = regexLinkExtractor.GetLinks(doc);

            Assert.IsTrue(result.Any(r => r.Item1 == "www.microsoft.com" && r.Item2 == "A link to microsoft"));
            Assert.IsTrue(result.Any(r => r.Item1 == "www.google.com" && r.Item2 == "A link to google"));
        }