Ejemplo n.º 1
0
        public void MultiWebsiteExtractionTest()
        {
            var multiExtractor = new MultiExtractor(configRootFolder: "TestData", configFilesPattern: "*.json");
            var json           = multiExtractor.ParsePage(
                url: "http://answers.microsoft.com/en-us/windows/forum/windows_10-win_upgrade/i-want-to-reserve-my-free-copy-of-windows-10-but-i/9c3f7f56-3da8-4b40-a30f-e33772439ee1",
                html: File.ReadAllText(Path.Combine("TestData", "answers.microsoft.com.html")));

            dynamic parsedJson = JsonConvert.DeserializeObject(json);

            // Question
            Assert.AreNotEqual(null, parsedJson["question"], "Extractor should find a question in the HTML file");

            var question = parsedJson["question"];

            Assert.AreEqual("I want to reserve my free copy of Windows 10, but I don’t see the icon on the taskbar", question["title"].Value, "The extracted title is incorrect");
            Assert.AreNotEqual(null, question["content"], "The extracted question should have a content");
            Assert.IsTrue(question["content"].Value.Length > 0, "The extracted question content should have a length > 0");
            Assert.AreEqual(1642653, question["views"].Value, "The extracted views snippet is incorrect");

            // Question context
            Assert.AreNotEqual(null, question["hints"], "The extracted question should have hints");
            Assert.AreEqual(4, question["hints"].Count, "The extracted question should have 4 hints");
            Assert.AreEqual("PC", question["hints"][3].ToString(), "The 4th hint of the extracted question should be PC");

            // Answers
            Assert.AreNotEqual(null, parsedJson["answers"], "Extractor should find answers in the HTML file");
            Assert.AreEqual(2, parsedJson["answers"].Count, "Extractor should find two answers in the thread summary of the HTML file");

            var secondAnswer = parsedJson["answers"][1];

            Assert.AreEqual("Most Helpful Reply", secondAnswer["type"].Value, "The extracted type of the answer is incorrect");
            Assert.AreNotEqual(null, secondAnswer["content"], "The content array in the extracted answer should not be null");
            Assert.IsTrue(secondAnswer["content"].Count > 0, "The content array in the extracted answer should have one or more items");
            Assert.AreEqual(4, secondAnswer["lists"].Count, "The lists array should have 4 items");
            Assert.IsTrue(secondAnswer["lists"][0]["items"].Count > 0, "First item in the lists array should have at least one item");

            // Check is textAboveLength exists in each list
            foreach (var answer in parsedJson["answers"])
            {
                var lists = answer["lists"];

                if (lists != null)
                {
                    foreach (var list in lists)
                    {
                        Assert.AreEqual(JTokenType.Integer, list["textAboveLength"].Type, "The extracted textAboveLength should be an integer");
                        var textAboveLength = ((JValue)list["textAboveLength"]).ToObject <int>();
                        Assert.IsTrue(textAboveLength > 0, string.Format(CultureInfo.InvariantCulture, "textAboveLength was not greater than 0. The extracted value is: {0}", textAboveLength));
                    }
                }
            }
        }