Ejemplo n.º 1
0
        public static List <HtmlUri> Extract(string html, Uri root)
        {
            HtmlUriExtractor ex = new HtmlUriExtractor(html, root);

            while (!ex.EOF)
            {
                ex.Read();
            }
            return(ex.Uris);
        }
Ejemplo n.º 2
0
        protected override bool OnAttributeFound(ref string name, ref string value)
        {
            switch (name.ToLower())
            {
            case "src":
            case "href":
                if (BaseHref != null)
                {
                    value = HtmlUriExtractor.TryCreate(BaseHref, value);
                }
                break;
            }

            return(base.OnAttributeFound(ref name, ref value));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// This method is overriden to filter out attributes which are not allowed
        /// </summary>
        public override void WriteAttributes(XmlReader reader, bool defattr)
        {
            if (Options.FilterOutput)
            {
                // The following code is copied from implementation of XmlWriter's
                // WriteAttributes method.
                if (reader == null)
                {
                    throw new ArgumentNullException("reader");
                }
                if ((reader.NodeType == XmlNodeType.Element) || (reader.NodeType == XmlNodeType.XmlDeclaration))
                {
                    if (reader.MoveToFirstAttribute())
                    {
                        WriteAttributes(reader, defattr);
                        reader.MoveToElement();
                    }
                }
                else
                {
                    if (reader.NodeType != XmlNodeType.Attribute)
                    {
                        throw new XmlException("Xml_InvalidPosition");
                    }
                    do
                    {
                        if (defattr || !reader.IsDefault)
                        {
                            string attributename = reader.LocalName.ToLower();

                            // Check if the attribute is allowed
                            bool canWrite = true;

                            switch (LastStartElement)
                            {
                            case "embed":
                                canWrite = true;
                                break;

                            case "img":
                                if (Options.RewriteImgSize.HasValue &&
                                    Options.RewriteImgSize.Value.Width <= 0 &&
                                    Options.RewriteImgSize.Value.Height <= 0 &&
                                    (attributename == "width" || attributename == "height"))
                                {
                                    canWrite = false;
                                }
                                break;

                            default:
                                canWrite = (Array.IndexOf(Options.AllowedAttributes, attributename) >= 0);
                                break;
                            }

                            // If allowed, write the attribute
                            if (canWrite)
                            {
                                WriteStartAttribute(reader.Prefix, reader.LocalName, reader.NamespaceURI);
                            }

                            while (reader.ReadAttributeValue())
                            {
                                if (reader.NodeType == XmlNodeType.EntityReference)
                                {
                                    if (canWrite)
                                    {
                                        WriteEntityRef(reader.Name);
                                    }

                                    continue;
                                }

                                if (canWrite)
                                {
                                    string value = reader.Value;

                                    if (Options.BaseHref != null && LastStartElement == "a" && attributename == "href")
                                    {
                                        value = HtmlUriExtractor.TryCreate(Options.BaseHref, reader.Value, value);
                                    }

                                    if (Options.RewriteHref != null && LastStartElement == "a" && attributename == "href")
                                    {
                                        value = string.Format("{0}&amp;Url={1}", Options.RewriteHref.OriginalString,
                                                              Renderer.UrlEncode(value));
                                    }

                                    if (Options.BaseHref != null &&
                                        (LastStartElement == "img" || LastStartElement == "embed") &&
                                        attributename == "src")
                                    {
                                        value = HtmlUriExtractor.TryCreate(
                                            Options.BaseHref, reader.Value, value);
                                    }

                                    if (Options.RewriteImgSrc != null &&
                                        LastStartElement == "img" &&
                                        attributename == "src")
                                    {
                                        value = Options.RewriteImgSrc.ToString().Replace("{url}",
                                                                                         Renderer.UrlEncode(value));
                                    }
                                    else if (Options.RewriteImgSize.HasValue &&
                                             LastStartElement == "img" &&
                                             attributename == "width")
                                    {
                                        value = Options.RewriteImgSize.Value.Width.ToString();
                                    }
                                    else if (Options.RewriteImgSize.HasValue &&
                                             LastStartElement == "img" &&
                                             attributename == "height")
                                    {
                                        value = Options.RewriteImgSize.Value.Height.ToString();
                                    }

                                    if (LastStartElement == "link" &&
                                        attributename == "rel")
                                    {
                                        switch (value.ToLower())
                                        {
                                        case "stylesheet":
                                            value = "stylesheet-stripped";
                                            break;
                                        }
                                    }

                                    WriteString(value);
                                }
                            }

                            if (canWrite)
                            {
                                WriteEndAttribute();
                            }
                        }
                    } while (reader.MoveToNextAttribute());
                }
            }
            else
            {
                base.WriteAttributes(reader, defattr);
            }
        }
        public override void WriteAttributes(XmlReader reader, bool defattr)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            if ((reader.NodeType == XmlNodeType.Element) || (reader.NodeType == XmlNodeType.XmlDeclaration))
            {
                if (reader.MoveToFirstAttribute())
                {
                    this.WriteAttributes(reader, defattr);
                    reader.MoveToElement();
                }
            }
            else
            {
                if (reader.NodeType != XmlNodeType.Attribute)
                {
                    throw new XmlException("Xml_InvalidPosition");
                }
                do
                {
                    if (defattr || !reader.IsDefault)
                    {
                        string attributename = reader.LocalName.ToLower();

                        this.WriteStartAttribute(reader.Prefix, reader.LocalName, reader.NamespaceURI);

                        while (reader.ReadAttributeValue())
                        {
                            if (reader.NodeType == XmlNodeType.EntityReference)
                            {
                                this.WriteEntityRef(reader.Name);
                                continue;
                            }

                            string value = reader.Value;

                            if (BaseHref != null &&
                                LastStartElement == "a" &&
                                attributename == "href")
                            {
                                value = HtmlUriExtractor.TryCreate(
                                    BaseHref, reader.Value, value);
                            }

                            if (BaseHref != null &&
                                (LastStartElement == "img" || LastStartElement == "embed") &&
                                attributename == "src")
                            {
                                value = HtmlUriExtractor.TryCreate(
                                    BaseHref, reader.Value, value);
                            }

                            this.WriteString(value);
                        }

                        this.WriteEndAttribute();
                    }
                } while (reader.MoveToNextAttribute());
            }
        }
Ejemplo n.º 5
0
 public static List<HtmlUri> Extract(string html, Uri root)
 {
     HtmlUriExtractor ex = new HtmlUriExtractor(html, root);
     while (!ex.EOF) ex.Read();
     return ex.Uris;
 }