Ejemplo n.º 1
0
        internal virtual void MaybeSetBaseUri(iText.StyledXmlParser.Jsoup.Nodes.Element @base)
        {
            if (baseUriSetFromDoc)
            {
                // only listen to the first <base href> in parse
                return;
            }
            String href = @base.AbsUrl("href");

            if (href.Length != 0)
            {
                // ignore <base target> etc
                baseUri           = href;
                baseUriSetFromDoc = true;
                doc.SetBaseUri(href);
            }
        }
Ejemplo n.º 2
0
        public virtual void TestBaidu()
        {
            // tests <meta http-equiv="Content-Type" content="text/html;charset=gb2312">
            FileInfo @in = iText.StyledXmlParser.Jsoup.PortTestUtil.GetFile("/htmltests/baidu-cn-home.html");
            Document doc = iText.StyledXmlParser.Jsoup.Jsoup.Parse(@in, null, "http://www.baidu.com");

            // http charset is gb2312, but NOT specifying it, to test http-equiv parse
            iText.StyledXmlParser.Jsoup.Nodes.Element submit = doc.Select("#su").First();
            NUnit.Framework.Assert.AreEqual("百度一下", submit.Attr("value"));
            // test from attribute match
            submit = doc.Select("input[value=百度一下]").First();
            NUnit.Framework.Assert.AreEqual("su", submit.Id());
            iText.StyledXmlParser.Jsoup.Nodes.Element newsLink = doc.Select("a:contains(新)").First();
            NUnit.Framework.Assert.AreEqual(newsHref, newsLink.AbsUrl("href"));
            // check auto-detect from meta
            NUnit.Framework.Assert.AreEqual("GB2312", doc.OutputSettings().Charset().DisplayName());
            NUnit.Framework.Assert.AreEqual("<title>百度一下,你就知道      </title>", doc.Select("title").OuterHtml());
            doc.OutputSettings().Charset("ascii");
            NUnit.Framework.Assert.AreEqual("<title>&#x767e;&#x5ea6;&#x4e00;&#x4e0b;&#xff0c;&#x4f60;&#x5c31;&#x77e5;&#x9053;      </title>"
                                            , doc.Select("title").OuterHtml());
        }
Ejemplo n.º 3
0
        private bool TestValidProtocol(iText.StyledXmlParser.Jsoup.Nodes.Element el, iText.StyledXmlParser.Jsoup.Nodes.Attribute
                                       attr, ICollection <Whitelist.Protocol> protocols)
        {
            // try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
            // rels without a baseuri get removed
            String value = el.AbsUrl(attr.Key);

            if (value.Length == 0)
            {
                value = attr.Value;
            }
            // if it could not be made abs, run as-is to allow custom unknown protocols
            if (!preserveRelativeLinks)
            {
                attr.SetValue(value);
            }
            foreach (Whitelist.Protocol protocol in protocols)
            {
                String prot = protocol.ToString();
                if (prot.Equals("#"))
                {
                    // allows anchor links
                    if (IsValidAnchor(value))
                    {
                        return(true);
                    }
                    else
                    {
                        continue;
                    }
                }
                prot += ":";
                if (value.ToLowerInvariant().StartsWith(prot))
                {
                    return(true);
                }
            }
            return(false);
        }