/// <summary> /// Parse HTML into a Document /// </summary> /// <param name="html"></param> /// <param name="baseUri"></param> /// <returns></returns> public Document ParseInput(string html, string baseUri) { errors = CanTrackErrors ? ParseErrorList.Tracking(maxErrors) : ParseErrorList.NoTracking(); Document doc = treeBuilder.Parse(html, baseUri, errors); return(doc); }
/// <summary> /// Utility method to unescape HTML entities from a string /// </summary> /// <param name="string">HTML escaped string</param> /// <param name="inAttribute">if the string is to be escaped in strict mode (as attributes are)</param> /// <returns>an unescaped string</returns> public static string UnescapeEntities(string @string, bool inAttribute) { Tokeniser tokeniser = new Tokeniser(new CharacterReader(@string), ParseErrorList.NoTracking()); return(tokeniser.UnescapeEntities(inAttribute)); }
/// <summary> /// Parse a fragment of XML into a list of nodes. /// </summary> /// <param name="fragmentXml">the fragment of XML to parse</param> /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param> /// <returns>list of nodes parsed from the input XML.</returns> public static IReadOnlyList <Node> ParseXmlFragment(string fragmentXml, string baseUri) { XmlTreeBuilder treeBuilder = new XmlTreeBuilder(); return(treeBuilder.ParseFragment(fragmentXml, baseUri, ParseErrorList.NoTracking())); }
/// <summary> /// Parse a fragment of HTML into a list of nodes. /// </summary> /// <remarks> /// The context element, if supplied, supplies parsing context. /// </remarks> /// <param name="fragmentHtml">the fragment of HTML to parse</param> /// <param name="context"> /// (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This /// provides stack context (for implicit element creation). /// </param> /// <param name="baseUri"> /// base URI of document (i.e. original fetch location), for resolving relative URLs. /// </param> /// <returns> /// list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified. /// </returns> public static IReadOnlyList <Node> ParseFragment(string fragmentHtml, Element context, string baseUri) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); return(treeBuilder.ParseFragment(fragmentHtml, context, baseUri, ParseErrorList.NoTracking())); }
// utility methods /// <summary> /// Parse HTML into a Document. /// </summary> /// <param name="html">HTML to parse</param> /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param> /// <returns>parsed Document</returns> public static Document Parse(string html, string baseUri) { TreeBuilder treeBuilder = new HtmlTreeBuilder(); return(treeBuilder.Parse(html, baseUri, ParseErrorList.NoTracking())); }
internal Document Parse(string input, string baseUri) { return(Parse(input, baseUri, ParseErrorList.NoTracking())); }