public void Should_download_content()
        {
            #region arrange data

            var          mockClient  = new Mock <ICrawlerWebClient>();
            const string testUrl     = "url";
            const string testContent = "text";
            mockClient.Setup(m => m.Download(It.IsAny <string>())).Returns(testContent);

            ServiceLocator.RegisterForDependency(mockClient.Object);

            var jobItem = new JobItem
            {
                Url = new UrlItem
                {
                    Url = testUrl
                }
            };

            #endregion

            var actual = new PipelineRoutines().DownloadContent(jobItem);

            Assert.NotNull(actual);
            Assert.Equal(testContent, actual.Content);
            Assert.Equal(jobItem, actual.Job);
            Assert.Equal(testUrl, actual.Job.Url.Url);
        }
        public void Should_return_parsing_rules()
        {
            #region arrange data

            var          mockSettings = new Mock <ICrawlerSettingsRepository>();
            const string testUrl      = "url";
            const string testContent  = "text";
            mockSettings.Setup(m => m.GetParsingRules(It.IsAny <JobItem>())).Returns(new[]
            {
                new CrawlRule {
                    DataType = DataBlockType.Link
                },
                new CrawlRule {
                    DataType = DataBlockType.Picture
                }
            });

            ServiceLocator.RegisterForDependency(mockSettings.Object);

            var jobItem = new JobItem
            {
                Url = new UrlItem
                {
                    Url = testUrl
                }
            };

            #endregion

            var actual = new PipelineRoutines().GetParsingRules(new DownloadedContentData(jobItem, testContent)).ToList();

            Assert.NotNull(actual);
            Assert.Equal(2, actual.Count);
            Assert.Equal(testContent, actual[0].Content);
            Assert.Equal(testContent, actual[1].Content);
            Assert.Equal(jobItem, actual[0].Job);
            Assert.Equal(jobItem, actual[1].Job);
            Assert.Equal(DataBlockType.Link, actual[0].Rule.DataType);
            Assert.Equal(DataBlockType.Picture, actual[1].Rule.DataType);
        }
        public void Should_return_all_matching_text_block_when_parsing()
        {
            #region arrange data

            const string testUrl     = "url";
            const string testContent = "<a>text [text0] should be found in [text1] square [text2]brackets</a>";
            var          jobItem     = new JobItem
            {
                Url = new UrlItem
                {
                    Url = testUrl
                }
            };

            var crawlRule = new CrawlRule {
                DataType = DataBlockType.Link, RegExpression = @"\[\w+\d{1}\]"
            };

            #endregion

            var actual = new PipelineRoutines().ParseContent(new ParsingRulesData(jobItem, crawlRule, testContent)).ToList();

            Assert.NotNull(actual);
            Assert.Equal(3, actual.Count);

            Assert.Equal("[text0]", actual[0].Data);
            Assert.Equal("[text1]", actual[1].Data);
            Assert.Equal("[text2]", actual[2].Data);

            Assert.Equal(jobItem, actual[0].Job);
            Assert.Equal(jobItem, actual[1].Job);
            Assert.Equal(jobItem, actual[2].Job);

            Assert.Equal(DataBlockType.Link, actual[0].BlockType);
            Assert.Equal(DataBlockType.Link, actual[1].BlockType);
            Assert.Equal(DataBlockType.Link, actual[2].BlockType);
        }