public HtmlDocument ParseInput(String html, Uri baseUri) { errors = IsTrackErrors() ? HtmlParseErrorCollection.Tracking(maxErrors) : HtmlParseErrorCollection.NoTracking(); HtmlDocument doc = treeBuilder.Parse(html, baseUri, errors); return(doc); }
protected virtual void InitialiseParse(string input, Uri baseUri, HtmlParseErrorCollection errors) { if (input == null) { throw new ArgumentNullException(nameof(input)); } this.doc = CreateDocument(baseUri); this.reader = new CharacterReader(input); this.errors = errors; this.tokeniser = new Tokeniser(reader, errors); this.stack = new DescendableLinkedList <DomContainer>(); this.baseUri = baseUri; }
public abstract IList <DomNode> ParseFragment(string inputFragment, HtmlElement context, Uri baseUri, HtmlParseErrorCollection errors);
public virtual HtmlDocument Parse(string input, Uri baseUri, HtmlParseErrorCollection errors) { InitialiseParse(input, baseUri, errors); RunParser(); return(doc); }
public override IList <DomNode> ParseFragment(string inputFragment, HtmlElement context, Uri baseUri, HtmlParseErrorCollection errors) { InitialiseParse(inputFragment, baseUri, errors); HtmlElement root = (HtmlElement)doc.CreateElement("root"); stack.AddLast(root); RunParser(); // TODO Shouldn't need ToList when concurrent modifications can be avoided return(root.ChildNodes.ToList()); }
protected override void InitialiseParse(String input, Uri baseUri, HtmlParseErrorCollection errors) { base.InitialiseParse(input, baseUri, errors); stack.AddLast(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) }
public Tokeniser(CharacterReader reader, HtmlParseErrorCollection errors) { this.reader = reader; this.errors = errors; }
public static IList <DomNode> ParseFragment(String fragmentHtml, HtmlElement context, Uri baseUri) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); return(treeBuilder.ParseFragment(fragmentHtml, context, baseUri, HtmlParseErrorCollection.NoTracking())); }
public static HtmlDocument Parse(String html, Uri baseUri) { TreeBuilder treeBuilder = new HtmlTreeBuilder(); return(treeBuilder.Parse(html, baseUri, HtmlParseErrorCollection.NoTracking())); }
public override IList <DomNode> ParseFragment(string inputFragment, HtmlElement context, Uri baseUri, HtmlParseErrorCollection errors) { // context may be null InitialiseParse(inputFragment, baseUri, errors); contextElement = context; fragmentParsing = true; HtmlElement root = null; if (context != null) { if (context.OwnerDocument != null) // quirks setup: { doc.QuirksMode = context.OwnerDocument.GetQuirksMode(); } // initialise the tokeniser state: string contextTag = context.NodeName; switch (contextTag) { case "title": case "textarea": tokeniser.Transition(TokeniserState.Rcdata); break; case "script": tokeniser.Transition(TokeniserState.ScriptData); break; case "noscript": tokeniser.Transition(TokeniserState.Data); // if scripting enabled, rawtext break; case "plaintext": tokeniser.Transition(TokeniserState.Data); break; case "iframe": case "noembed": case "noframes": case "style": case "xmp": tokeniser.Transition(TokeniserState.Rawtext); break; default: tokeniser.Transition(TokeniserState.Data); // default break; } root = (HtmlElement)doc.CreateElement("html"); root.BaseUri = baseUri; doc.Append(root); stack.AddFirst(root); ResetInsertionMode(); // TODO: setup form element to nearest form on context (up ancestor chain) } RunParser(); // TODO Shouldn't need ToList when concurrent modifications can be saved if (context != null) { return(root.ChildNodes.ToList()); } else { return(doc.ChildNodes.ToList()); } }
public override HtmlDocument Parse(string input, Uri baseUri, HtmlParseErrorCollection errors) { _state = HtmlTreeBuilderState.Initial; return(base.Parse(input, baseUri, errors)); }
public static void UnexpectedlyReachedEof(HtmlParseErrorCollection errors, CharacterReader reader, TokeniserState state) { errors.Add(reader.Position, "Unexpectedly reached end of file (EOF) in input state [{0}]", state); }
public static void UnexpectedChar(HtmlParseErrorCollection errors, CharacterReader reader, TokeniserState state) { errors.Add(reader.Position, "Unexpected char '{0}' in input state [{1}]", reader.Current, state); }
public static void UnexpectedToken(this HtmlParseErrorCollection errors, int readerPos, HtmlTreeBuilderState state, Token currentToken) { errors.Add(readerPos, "Unexpected token [{0}] when in state [{1}]", currentToken.TokenTypeName, state); }