public static List <HtmlUri> Extract(string html, Uri root) { HtmlUriExtractor ex = new HtmlUriExtractor(html, root); while (!ex.EOF) { ex.Read(); } return(ex.Uris); }
protected override bool OnAttributeFound(ref string name, ref string value) { switch (name.ToLower()) { case "src": case "href": if (BaseHref != null) { value = HtmlUriExtractor.TryCreate(BaseHref, value); } break; } return(base.OnAttributeFound(ref name, ref value)); }
/// <summary> /// This method is overriden to filter out attributes which are not allowed /// </summary> public override void WriteAttributes(XmlReader reader, bool defattr) { if (Options.FilterOutput) { // The following code is copied from implementation of XmlWriter's // WriteAttributes method. if (reader == null) { throw new ArgumentNullException("reader"); } if ((reader.NodeType == XmlNodeType.Element) || (reader.NodeType == XmlNodeType.XmlDeclaration)) { if (reader.MoveToFirstAttribute()) { WriteAttributes(reader, defattr); reader.MoveToElement(); } } else { if (reader.NodeType != XmlNodeType.Attribute) { throw new XmlException("Xml_InvalidPosition"); } do { if (defattr || !reader.IsDefault) { string attributename = reader.LocalName.ToLower(); // Check if the attribute is allowed bool canWrite = true; switch (LastStartElement) { case "embed": canWrite = true; break; case "img": if (Options.RewriteImgSize.HasValue && Options.RewriteImgSize.Value.Width <= 0 && Options.RewriteImgSize.Value.Height <= 0 && (attributename == "width" || attributename == "height")) { canWrite = false; } break; default: canWrite = (Array.IndexOf(Options.AllowedAttributes, attributename) >= 0); break; } // If allowed, write the attribute if (canWrite) { WriteStartAttribute(reader.Prefix, reader.LocalName, reader.NamespaceURI); } while (reader.ReadAttributeValue()) { if (reader.NodeType == XmlNodeType.EntityReference) { if (canWrite) { WriteEntityRef(reader.Name); } continue; } if (canWrite) { string value = reader.Value; if (Options.BaseHref != null && LastStartElement == "a" && attributename == "href") { value = HtmlUriExtractor.TryCreate(Options.BaseHref, reader.Value, value); } if (Options.RewriteHref != null && LastStartElement == "a" && attributename == "href") { value = string.Format("{0}&Url={1}", Options.RewriteHref.OriginalString, Renderer.UrlEncode(value)); } if (Options.BaseHref != null && (LastStartElement == "img" || LastStartElement == "embed") && attributename == "src") { value = HtmlUriExtractor.TryCreate( Options.BaseHref, reader.Value, value); } if (Options.RewriteImgSrc != null && LastStartElement == "img" && attributename == "src") { value = Options.RewriteImgSrc.ToString().Replace("{url}", Renderer.UrlEncode(value)); } else if (Options.RewriteImgSize.HasValue && LastStartElement == "img" && attributename == "width") { value = Options.RewriteImgSize.Value.Width.ToString(); } else if (Options.RewriteImgSize.HasValue && LastStartElement == "img" && attributename == "height") { value = Options.RewriteImgSize.Value.Height.ToString(); } if (LastStartElement == "link" && attributename == "rel") { switch (value.ToLower()) { case "stylesheet": value = "stylesheet-stripped"; break; } } WriteString(value); } } if (canWrite) { WriteEndAttribute(); } } } while (reader.MoveToNextAttribute()); } } else { base.WriteAttributes(reader, defattr); } }
public override void WriteAttributes(XmlReader reader, bool defattr) { if (reader == null) { throw new ArgumentNullException("reader"); } if ((reader.NodeType == XmlNodeType.Element) || (reader.NodeType == XmlNodeType.XmlDeclaration)) { if (reader.MoveToFirstAttribute()) { this.WriteAttributes(reader, defattr); reader.MoveToElement(); } } else { if (reader.NodeType != XmlNodeType.Attribute) { throw new XmlException("Xml_InvalidPosition"); } do { if (defattr || !reader.IsDefault) { string attributename = reader.LocalName.ToLower(); this.WriteStartAttribute(reader.Prefix, reader.LocalName, reader.NamespaceURI); while (reader.ReadAttributeValue()) { if (reader.NodeType == XmlNodeType.EntityReference) { this.WriteEntityRef(reader.Name); continue; } string value = reader.Value; if (BaseHref != null && LastStartElement == "a" && attributename == "href") { value = HtmlUriExtractor.TryCreate( BaseHref, reader.Value, value); } if (BaseHref != null && (LastStartElement == "img" || LastStartElement == "embed") && attributename == "src") { value = HtmlUriExtractor.TryCreate( BaseHref, reader.Value, value); } this.WriteString(value); } this.WriteEndAttribute(); } } while (reader.MoveToNextAttribute()); } }
public static List<HtmlUri> Extract(string html, Uri root) { HtmlUriExtractor ex = new HtmlUriExtractor(html, root); while (!ex.EOF) ex.Read(); return ex.Uris; }