public Document ParseInput(string html, string baseUri) { _errors = IsTrackErrors ? ParseErrorList.Tracking(_maxErrors) : ParseErrorList.NoTracking(); Document doc = _treeBuilder.Parse(html, baseUri, _errors); return(doc); }
public virtual Document Parse(string input, string baseUri, ParseErrorList errors) { InitialiseParse(input, baseUri, errors); RunParser(); return(_doc); }
protected ParseErrorList _errors; // null when not tracking errors protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { if (input == null) { throw new ArgumentNullException("String input must not be null"); } if (baseUri == null) { throw new ArgumentNullException("BaseURI must not be null"); } _doc = new Document(baseUri); _reader = new CharacterReader(input); _errors = errors; _tokeniser = new Tokeniser(_reader, errors); _stack = new DescendableLinkedList <Element>(); this._baseUri = baseUri; }
public Document Parse(string input, string baseUri) { return(Parse(input, baseUri, ParseErrorList.NoTracking())); }
protected override void InitialiseParse(string input, string baseUri, ParseErrorList errors) { base.InitialiseParse(input, baseUri, errors); _stack.AddLast(_doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) }
public Tokeniser(CharacterReader reader, ParseErrorList errors) { this._reader = reader; this._errors = errors; }
// static parse functions below /// <summary> /// Parse HTML into a Document. /// </summary> /// <param name="html">HTML to parse</param> /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param> /// <returns>parsed Document</returns> public static Document Parse(string html, string baseUri) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); return(treeBuilder.Parse(html, baseUri, ParseErrorList.NoTracking())); }
/// <summary> /// Utility method to unescape HTML entities from a string /// </summary> /// <param name="s">HTML escaped string</param> /// <param name="inAttribute">If the string is to be escaped in strict mode (as attributes are)</param> /// <returns>An unescaped string</returns> public static string UnescapeEntities(string s, bool inAttribute) { Tokeniser tokeniser = new Tokeniser(new CharacterReader(s), ParseErrorList.NoTracking()); return(tokeniser.UnescapeEntities(inAttribute)); }
/// <summary> /// Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context. /// </summary> /// <param name="fragmentHtml">the fragment of HTML to parse</param> /// <param name="context">(optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This provides stack context (for implicit element creation).</param> /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param> /// <returns>list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.</returns> public static IList <Node> ParseFragment(string fragmentHtml, Element context, string baseUri) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); return(treeBuilder.ParseFragment(fragmentHtml, context, baseUri, ParseErrorList.NoTracking())); }
public IList <Node> ParseFragment(string inputFragment, Element context, string baseUri, ParseErrorList errors) { // context may be null _state = HtmlTreeBuilderState.Initial; InitialiseParse(inputFragment, baseUri, errors); _contextElement = context; _fragmentParsing = true; Element root = null; if (context != null) { if (context.OwnerDocument != null) // quirks setup: { _doc.QuirksMode(context.OwnerDocument.QuirksMode()); } // initialise the tokeniser state: string contextTag = context.TagName(); if (StringUtil.In(contextTag, "title", "textarea")) { _tokeniser.Transition(TokeniserState.RcData); } else if (StringUtil.In(contextTag, "iframe", "noembed", "noframes", "style", "xmp")) { _tokeniser.Transition(TokeniserState.RawText); } else if (contextTag.Equals("script")) { _tokeniser.Transition(TokeniserState.ScriptData); } else if (contextTag.Equals(("noscript"))) { _tokeniser.Transition(TokeniserState.Data); // if scripting enabled, rawtext } else if (contextTag.Equals("plaintext")) { _tokeniser.Transition(TokeniserState.Data); } else { _tokeniser.Transition(TokeniserState.Data); // default } root = new Element(Tag.ValueOf("html"), baseUri); _doc.AppendChild(root); _stack.AddFirst(root); ResetInsertionMode(); // todo: setup form element to nearest form on context (up ancestor chain) } RunParser(); if (context != null) { return(root.ChildNodes); } else { return(_doc.ChildNodes); } }
public override Document Parse(string input, string baseUri, ParseErrorList errors) { _state = HtmlTreeBuilderState.Initial; return(base.Parse(input, baseUri, errors)); }