public IList<Node> ParseFragment(string inputFragment, Element context, string baseUri, ParseErrorList errors) { // context may be null _state = HtmlTreeBuilderState.Initial; InitialiseParse(inputFragment, baseUri, errors); _contextElement = context; _fragmentParsing = true; Element root = null; if (context != null) { if (context.OwnerDocument != null) // quirks setup: { _doc.QuirksMode(context.OwnerDocument.QuirksMode()); } // initialise the tokeniser state: string contextTag = context.TagName(); if (StringUtil.In(contextTag, "title", "textarea")) { _tokeniser.Transition(TokeniserState.RcData); } else if (StringUtil.In(contextTag, "iframe", "noembed", "noframes", "style", "xmp")) { _tokeniser.Transition(TokeniserState.RawText); } else if (contextTag.Equals("script")) { _tokeniser.Transition(TokeniserState.ScriptData); } else if (contextTag.Equals(("noscript"))) { _tokeniser.Transition(TokeniserState.Data); // if scripting enabled, rawtext } else if (contextTag.Equals("plaintext")) { _tokeniser.Transition(TokeniserState.Data); } else { _tokeniser.Transition(TokeniserState.Data); // default } root = new Element(Tag.ValueOf("html"), baseUri); _doc.AppendChild(root); _stack.AddFirst(root); ResetInsertionMode(); // todo: setup form element to nearest form on context (up ancestor chain) } RunParser(); if (context != null) { return root.ChildNodes; } else { return _doc.ChildNodes; } }
internal override void InitialiseParse(string input, string baseUri, ParseErrorList errors) { base.InitialiseParse(input, baseUri, errors); stack.AddLast(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) doc.OutputSettings.Syntax = DocumentSyntax.Xml; }
public virtual Document Parse(string input, string baseUri, ParseErrorList errors) { InitialiseParse(input, baseUri, errors); RunParser(); return _doc; }
// current doc we are building into // the stack of open elements // current base uri, for creating new elements // currentToken is used only for error tracking. // null when not tracking errors internal virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { Validate.NotNull(input, "String input must not be null"); Validate.NotNull(baseUri, "BaseURI must not be null"); doc = new Document(baseUri); reader = new CharacterReader(input); this.errors = errors; tokeniser = new Tokeniser(reader, errors); stack = new DescendableLinkedList<Element>(); this.baseUri = baseUri; }
internal Tokeniser(CharacterReader reader, ParseErrorList errors) { // replaces null character // html input // errors found while tokenising // current tokenisation state // the token we are about to emit on next read // buffers characters to output as one token // buffers data looking for </script> // tag we are building up // doctype building up // comment building up // the last start tag emitted, to test appropriate end tag this.reader = reader; this.errors = errors; }
protected ParseErrorList _errors; // null when not tracking errors protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { if (input == null) { throw new ArgumentNullException("String input must not be null"); } if (baseUri == null) { throw new ArgumentNullException("BaseURI must not be null"); } _doc = new Document(baseUri); _reader = new CharacterReader(input); _errors = errors; _tokeniser = new Tokeniser(_reader, errors); _stack = new DescendableLinkedList<Element>(); this._baseUri = baseUri; }
internal IReadOnlyList<Node> ParseFragment(string inputFragment, string baseUri, ParseErrorList errors) { InitialiseParse(inputFragment, baseUri, errors); RunParser(); return doc.ChildNodes; }
protected override void InitialiseParse(string input, string baseUri, ParseErrorList errors) { base.InitialiseParse(input, baseUri, errors); _stack.AddLast(_doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) }
/// <summary> /// Parse HTML into a Document /// </summary> /// <param name="html"></param> /// <param name="baseUri"></param> /// <returns></returns> public Document ParseInput(string html, string baseUri) { errors = CanTrackErrors ? ParseErrorList.Tracking(maxErrors) : ParseErrorList.NoTracking(); Document doc = treeBuilder.Parse(html, baseUri, errors); return doc; }
public override Document Parse(string input, string baseUri, ParseErrorList errors) { _state = HtmlTreeBuilderState.Initial; return base.Parse(input, baseUri, errors); }
internal IReadOnlyList<Node> ParseFragment(string inputFragment, Element context, string baseUri, ParseErrorList errors) { // context may be null state = HtmlTreeBuilderState.Initial; InitialiseParse(inputFragment, baseUri, errors); contextElement = context; fragmentParsing = true; Element root = null; if (context != null) { if (context.OwnerDocument != null) { // quirks setup: doc.QuirksMode = context.OwnerDocument.QuirksMode; } // initialise the tokeniser state: string contextTag = context.TagName; if (StringUtil.In(contextTag, "title", "textarea")) { tokeniser.Transition(TokeniserState.Rcdata); } else if (StringUtil.In(contextTag, "iframe", "noembed", "noframes", "style", "xmp")) { tokeniser.Transition(TokeniserState.Rawtext); } else if (contextTag.Equals("script")) { tokeniser.Transition(TokeniserState.ScriptData); } else if (contextTag.Equals(("noscript"))) { tokeniser.Transition(TokeniserState.Data); // if scripting enabled, rawtext } else if (contextTag.Equals("plaintext")) { tokeniser.Transition(TokeniserState.Data); } else { tokeniser.Transition(TokeniserState.Data); } // default root = new Element(Tag.ValueOf("html"), baseUri); doc.AppendChild(root); stack.Push(root); ResetInsertionMode(); // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated // with form correctly Elements contextChain = context.Parents; contextChain.Insert(0, context); foreach (Element parent in contextChain) { if (parent is FormElement) { formElement = (FormElement)parent; break; } } } RunParser(); if (context != null) { return root.ChildNodes; } else { return doc.ChildNodes; } }
// tag searches //private static final String[] TagsScriptStyle = new String[]{"script", "style"}; // the current state // original / marked state // the current head element // the current form element // fragment parse context -- could be null even if fragment parsing // active (open) formatting elements // chars in table to be shifted out // if ok to go into frameset // if next inserts should be fostered // if parsing a fragment of html internal override Document Parse(string input, string baseUri, ParseErrorList errors) { state = HtmlTreeBuilderState.Initial; baseUriSetFromDoc = false; return base.Parse(input, baseUri, errors); }
public Tokeniser(CharacterReader reader, ParseErrorList errors) { this._reader = reader; this._errors = errors; }