public void MultiWebsiteExtractionTest() { var multiExtractor = new MultiExtractor(configRootFolder: "TestData", configFilesPattern: "*.json"); var json = multiExtractor.ParsePage( url: "http://answers.microsoft.com/en-us/windows/forum/windows_10-win_upgrade/i-want-to-reserve-my-free-copy-of-windows-10-but-i/9c3f7f56-3da8-4b40-a30f-e33772439ee1", html: File.ReadAllText(Path.Combine("TestData", "answers.microsoft.com.html"))); dynamic parsedJson = JsonConvert.DeserializeObject(json); // Question Assert.AreNotEqual(null, parsedJson["question"], "Extractor should find a question in the HTML file"); var question = parsedJson["question"]; Assert.AreEqual("I want to reserve my free copy of Windows 10, but I don’t see the icon on the taskbar", question["title"].Value, "The extracted title is incorrect"); Assert.AreNotEqual(null, question["content"], "The extracted question should have a content"); Assert.IsTrue(question["content"].Value.Length > 0, "The extracted question content should have a length > 0"); Assert.AreEqual(1642653, question["views"].Value, "The extracted views snippet is incorrect"); // Question context Assert.AreNotEqual(null, question["hints"], "The extracted question should have hints"); Assert.AreEqual(4, question["hints"].Count, "The extracted question should have 4 hints"); Assert.AreEqual("PC", question["hints"][3].ToString(), "The 4th hint of the extracted question should be PC"); // Answers Assert.AreNotEqual(null, parsedJson["answers"], "Extractor should find answers in the HTML file"); Assert.AreEqual(2, parsedJson["answers"].Count, "Extractor should find two answers in the thread summary of the HTML file"); var secondAnswer = parsedJson["answers"][1]; Assert.AreEqual("Most Helpful Reply", secondAnswer["type"].Value, "The extracted type of the answer is incorrect"); Assert.AreNotEqual(null, secondAnswer["content"], "The content array in the extracted answer should not be null"); Assert.IsTrue(secondAnswer["content"].Count > 0, "The content array in the extracted answer should have one or more items"); Assert.AreEqual(4, secondAnswer["lists"].Count, "The lists array should have 4 items"); Assert.IsTrue(secondAnswer["lists"][0]["items"].Count > 0, "First item in the lists array should have at least one item"); // Check is textAboveLength exists in each list foreach (var answer in parsedJson["answers"]) { var lists = answer["lists"]; if (lists != null) { foreach (var list in lists) { Assert.AreEqual(JTokenType.Integer, list["textAboveLength"].Type, "The extracted textAboveLength should be an integer"); var textAboveLength = ((JValue)list["textAboveLength"]).ToObject <int>(); Assert.IsTrue(textAboveLength > 0, string.Format(CultureInfo.InvariantCulture, "textAboveLength was not greater than 0. The extracted value is: {0}", textAboveLength)); } } } }