Transcode_returns_title_if_it_can_be_extracted()
        {
            // arrange
            const string expectedTitle = "Some title ¹ê³ó¿Ÿñæ";
            const string htmlContent   = "<html><head><title>" + expectedTitle + "</title></head><body></body></html>";

            var nReadabilityTranscoder    = new ReadabilityTranscoder();
            var urlFetcher                = new SimpleUrlFetcherStub(htmlContent);
            var nReadabilityWebTranscoder = new ReadabilityWebTranscoder(nReadabilityTranscoder, urlFetcher);

            var request = new WebTranscodeRequest("http://dummy.com/");

            var result = await nReadabilityWebTranscoder.TranscodeAsync(request);

            Assert.NotNull(result.Title);
            Assert.AreEqual(expectedTitle, result.Title);
        }
        public async Task Semplice1()
        {
            const string outputDir = "SampleWebOutput";

            var fetcher = new UrlFetcher();

            var nReadabilityTranscoder    = new ReadabilityTranscoder();
            var nReadabilityWebTranscoder = new ReadabilityWebTranscoder(nReadabilityTranscoder, fetcher);

            var webTranscodingInput = new WebTranscodeRequest("https://vanschneider.com/the-art-of-pricing-freelance-projects");

            var result = await nReadabilityWebTranscoder.TranscodeAsync(webTranscodingInput);

            Assert.IsTrue(result.ContentExtracted);

            string extractedContent = result.Content;

            File.WriteAllText(
                Path.Combine(outputDir, string.Format("SampleOutput_a1.html")),
                extractedContent,
                Encoding.UTF8);
        }
        // SampleWebOutput
        // [Test]
        public async Task NewYorker()
        {
            const string outputDir = "SampleWebOutput";


            var fetcher = new UrlFetcher();

            var nReadabilityTranscoder    = new ReadabilityTranscoder();
            var nReadabilityWebTranscoder = new ReadabilityWebTranscoder(nReadabilityTranscoder, fetcher);

            var request = new WebTranscodeRequest("https://www.newyorker.com/culture/postscript/a-few-words-about-jerry-stiller");

            var result = await nReadabilityWebTranscoder.TranscodeAsync(request);

            Assert.IsTrue(result.ContentExtracted);


            string extractedContent = result.Content;

            File.WriteAllText(
                Path.Combine(outputDir, string.Format("SampleOutput_newyorker.html")),
                extractedContent,
                Encoding.UTF8);
        }
        public async Task TestSampleInputs([Values(1, 2, 3, 4, 5, 6, 7, 8)] int sampleInputNumber)
        {
            const string outputDir = "SampleWebOutput";

            string sampleInputNumberStr = sampleInputNumber.ToString().PadLeft(2, '0');

            string[] urls       = _Urls[sampleInputNumber - 1];
            string   initialUrl = urls[0];

            var fetcher = new FileBasedUrlFetcherStub(sampleInputNumber, urls);
            var _nReadabilityTranscoder    = new ReadabilityTranscoder();
            var _nReadabilityWebTranscoder = new ReadabilityWebTranscoder(_nReadabilityTranscoder, fetcher);

            var result = await _nReadabilityWebTranscoder.TranscodeAsync(initialUrl);

            if (!Directory.Exists(outputDir))
            {
                Directory.CreateDirectory(outputDir);
            }

            File.WriteAllText(
                Path.Combine(outputDir, string.Format("SampleOutput_{0}.html", sampleInputNumberStr)),
                result.Content,
                Encoding.UTF8);

            switch (sampleInputNumber)
            {
            case 1:
                Assert.IsTrue(result.Content.Contains(" freedom of movement or expression would constitute a new and unacceptable denial"));
                Assert.IsTrue(result.Content.Contains("Those expectations were on display in the crowd outside her house on Saturday."));
                Assert.That(Regex.Matches(result.Content, "Myanmar Junta Frees Dissident Daw Aung San Suu Kyi").Count, Is.EqualTo(4));
                break;

            case 2:
                Assert.IsTrue(result.Content.Contains("For Louie and Phil, the conversations did more than keep their minds sharp."));
                Assert.IsTrue(result.Content.Contains("It was absolutely dark and absolutely silent, save for the chattering of Phil’s teeth."));
                Assert.IsTrue(result.Content.Contains("A serial runaway and artful dodger"));
                Assert.That(Regex.Matches(result.Content, @"Adrift but Unbroken \| Politics").Count, Is.EqualTo(2));
                break;

            case 3:
                Assert.IsTrue(result.Content.Contains("The Chinese system as a whole has great weaknesses as well as great strengths."));
                Assert.IsTrue(result.Content.Contains(" This emphasis on limits is what begins pointing us back to coal."));
                Assert.IsTrue(result.Content.Contains(". For example, the possibility of dramatic rises in ocean levels, which could affect the habitability"));
                Assert.That(Regex.Matches(result.Content, "Dirty Coal, Clean Future - Magazine").Count, Is.EqualTo(3));     // Makes sure the title isn't duplicated
                break;

            case 4:      // Test duplicate content on subsequent page
                Assert.That(Regex.Matches(result.Content, "his may seem paradoxical, or backward").Count, Is.EqualTo(1));
                break;

            case 5:
                // page 1
                Assert.IsTrue(result.Content.Contains("The pressure's on, and as you glance back and forth between your rack and the board, you can hardly believe your eyes at the play you can make."));
                Assert.IsTrue(result.Content.Contains("How can you take your game to the next level? Let's start by looking at game play."));
                // page 2
                Assert.IsTrue(result.Content.Contains("The object of Scrabble is to get the most points by creating words."));
                Assert.IsTrue(result.Content.Contains("Now that you know the parts of the game, let's take a look at how to play it."));
                // page 3
                Assert.IsTrue(result.Content.Contains("To determine who goes first, put all the tiles into the bag and mix them up."));
                Assert.IsTrue(result.Content.Contains("The game continues until one player uses all of his tiles and there aren't any in the pouch, or if there are no more tiles and no one can make a word. Add up the total of your unplayed tiles and deduct it from your score. If you've used all of your tiles, add the total of the unplayed tiles to your score. The winner has the most points."));
                // page 4
                Assert.IsTrue(result.Content.Contains("If you play often enough, you'll need to learn how to play the board in order to get the highest score"));
                Assert.IsTrue(result.Content.Contains("With the game's popularity, it now comes in many variations. Let's take a look at some different ways to play Scrabble."));
                // page 5
                Assert.IsTrue(result.Content.Contains("Many people play Scrabble on a traditional flat board with the grid imprinted on it."));
                Assert.IsTrue(result.Content.Contains("With its worldwide popularity, it only makes sense that Scrabble comes in languages other than English. "));
                break;

            case 6:
                // page 1
                Assert.IsTrue(result.Content.Contains("In the aftermath of his resignation and then his death"));
                Assert.IsTrue(result.Content.Contains("Curb Your Enthusiasm"));
                // page 2
                Assert.IsTrue(result.Content.Contains("Jobs also seemed to suspect that he"));
                Assert.IsTrue(result.Content.Contains("And, sadly, it may remain one forever."));
                break;

            case 7:
                // page 1
                Assert.IsTrue(result.Content.Contains("post also betrays some misconceptions regarding our report."));
                Assert.IsTrue(result.Content.Contains("After all, none of us can resist the occasional study"));
                // "page" 2 (false positive)
                Assert.IsFalse(result.Content.Contains("In expressing this view, Clinton joins many Americans who worry about online misinformation, loss of privacy, and identity theft."));
                break;

            case 8:
                // page 1
                Assert.IsTrue(result.Content.Contains("For the last couple of days we’ve been asking people"));
                Assert.IsTrue(result.Content.Contains("list your favorite tools for slowing down feeds in the comments"));
                // "page" 2 (false positive)
                Assert.IsFalse(result.Content.Contains("signature fake news programs"));
                break;

            default:
                throw new NotSupportedException("Unknown sample input number (" + sampleInputNumber + "). Have you added another sample input? If so, then add appropriate asserts here as well.");
            }

            Assert.IsTrue(result.ContentExtracted);
        }