public void handlesDodgyCharset() { // tests that when we get back "UFT8", that it is recognised as unsupported, and falls back to default instead String url = "http://direct.infohound.net/tools/bad-charset.pl"; IResponse res = NSoupClient.Connect(url).Execute(); Assert.AreEqual("text/html; charset=UFT8", res.Header("Content-Type")); // from the header Assert.AreEqual(null, res.Charset()); // tried to get from header, not supported, so returns null Document doc = res.Parse(); // would throw an error if charset unsupported Assert.IsTrue(doc.Text().Contains("Hello!")); Assert.AreEqual("UTF-8", res.Charset()); // set from default on parse }
public void fetchBaidu() { IResponse res = NSoup.NSoupClient.Connect("http://www.baidu.com/").Timeout(10 * 1000).Execute(); Document doc = res.Parse(); Assert.AreEqual("GB2312", doc.OutputSettings().Encoding.WebName.ToUpperInvariant()); Assert.AreEqual("GB2312", res.Charset().ToUpperInvariant()); Assert.IsTrue(res.HasCookie("BAIDUID")); Assert.AreEqual("text/html;charset=gbk", res.ContentType()); }