示例#1
0
        public void SpeedCrawlerTest()
        {
            var limit = 10;

            var speedCrawler = new CrawlerV2();
            var queue        = new ConcurrentQueue <string>();

            queue.Enqueue("hamburg");
            speedCrawler.FoundImage += i =>
            {
                this.TestConsole.WriteLine(i.ToString());
                foreach (var humanoidTag in i.HumanoidTags)
                {
                    if (!queue.Contains(humanoidTag))
                    {
                        queue.Enqueue(humanoidTag);
                    }
                }
            };

            while (limit > 0 && queue.TryDequeue(out var hashtag))
            {
                limit--;

                this.TestConsole.WriteLine("Queue Size: " + queue.Count);
                this.TestConsole.WriteLine("Parsing HashTag: #" + hashtag);
                speedCrawler.ParseHashTagPage(hashtag);
            }

            this.TestConsole.WriteLine("Remained Queue: " + string.Join(", ", queue));
        }
        public async Task ChildElementLocatorTest()
        {
            // Arrange
            const string address   = "https://www.google.com.au/search?gl=au&hl=en&pws=0&num=100&q=";
            const string keywords  = "online title search";
            var          crawlerV2 = new CrawlerV2(address, keywords);

            await crawlerV2.Search();

            var response = crawlerV2.GetResponse();

            var parentString = "<div class=\"ZINbbc xpd O9g5cc uUPGi\"><div class=\"kCrYT\">";
            var childString  = "<div class=\"kCrYT\"><a href=\"/url?q=";
            var uri          = "https://www.infotrack.com.au";

            var childElementLocator = new ChildElementLocator(response, parentString, childString, uri);

            // Act
            childElementLocator.FindLocationsOfUri();


            // Assert
            CollectionAssert.AllItemsAreUnique(childElementLocator.GetUriLocations());
            CollectionAssert.AllItemsAreNotNull(childElementLocator.GetUriLocations());
            CollectionAssert.AllItemsAreInstancesOfType(childElementLocator.GetUriLocations(), typeof(int));
        }
示例#3
0
        public async Task GetResponse_InvalidAddressAndKeywords_throw()
        {
            // Arrange
            const string address   = "not an address";
            const string keywords  = "some keyworkd";
            var          crawlerV2 = new CrawlerV2(address, keywords);

            // Act and Assert
            await Assert.ThrowsExceptionAsync <ArgumentException>(() => crawlerV2.Search());
        }
示例#4
0
        /// <summary>
        /// Form submission
        /// </summary>
        /// <param name="uri"></param>
        /// <param name="keywords"></param>
        /// <returns></returns>
        public async Task <JsonResult> Crawl(string uri, string keywords)
        {
            // create a new crawler and do a crawl using the provided search terms
            var crawlerV2 = new CrawlerV2(Address, keywords);

            await crawlerV2.Search();

            var data = crawlerV2.GetResponse();

            if (data == string.Empty)
            {
                return new JsonResult()
                       {
                           Data = new { error = "Could not retrieve search engine results." }
                       }
            }
            ;

            // parent element
            const string htmlOfDivContainingLink = "<div class=\"ZINbbc xpd O9g5cc uUPGi\"><div class=\"kCrYT\">";
            // child element
            const string htmlOfAnchorElement = "<div class=\"kCrYT\"><a href=\"/url?q=";

            // create a new uri locator so we can figure out where all the links are
            var uriLocator = new ChildElementLocator(data, htmlOfDivContainingLink, htmlOfAnchorElement, uri, new FindIndex());

            uriLocator.FindLocationsOfUri();
            uriLocator.CountUriOccurrences();

            var uriLocations = uriLocator.GetUriLocations();
            var uriCount     = uriLocator.GetUriCount();

            // display "no results" if there are no links to infotrack in the search results
            if (uriCount == 0)
            {
                keywords = $"{keywords} (no results)";
            }

            return(new JsonResult
            {
                Data = new
                {
                    success = new
                    {
                        uri,
                        keywords,
                        uriCount,
                        uriLocations
                    }
                }
            });
        }
    }
示例#5
0
        public async Task GetResponse_ValidInputs_NotNull()
        {
            // Arrange
            const string address   = "https://www.google.com.au/search?gl=au&hl=en&pws=0&num=100&q=";
            const string keywords  = "online title search";
            CrawlerV2    crawlerV2 = new CrawlerV2(address, keywords);

            // Act
            await crawlerV2.Search();

            // Assert
            Assert.IsNotNull(crawlerV2.GetResponse());
        }
        public async Task CountUriOccurrencesTest_NotNull()
        {
            // Arrange
            const string address   = "https://www.google.com.au/search?gl=au&hl=en&pws=0&num=100&q=";
            const string keywords  = "online title search";
            var          crawlerV2 = new CrawlerV2(address, keywords);

            await crawlerV2.Search();

            var response = crawlerV2.GetResponse();

            var parentString = "<div class=\"ZINbbc xpd O9g5cc uUPGi\"><div class=\"kCrYT\">";
            var childString  = "<div class=\"kCrYT\"><a href=\"/url?q=";
            var uri          = "https://www.infotrack.com.au";

            var childElementLocator = new ChildElementLocator(response, parentString, childString, uri);

            // Act
            childElementLocator.CountUriOccurrences();

            // Assert
            Assert.IsNotNull(childElementLocator.GetUriCount());
        }