Example #1
0
        public Document ParseInput(string html, string baseUri)
        {
            _errors = IsTrackErrors ? ParseErrorList.Tracking(_maxErrors) : ParseErrorList.NoTracking();
            Document doc = _treeBuilder.Parse(html, baseUri, _errors);

            return(doc);
        }
Example #2
0
        public virtual Document Parse(string input, string baseUri, ParseErrorList errors)
        {
            InitialiseParse(input, baseUri, errors);

            RunParser();

            return(_doc);
        }
Example #3
0
        protected ParseErrorList _errors;                 // null when not tracking errors

        protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors)
        {
            if (input == null)
            {
                throw new ArgumentNullException("String input must not be null");
            }
            if (baseUri == null)
            {
                throw new ArgumentNullException("BaseURI must not be null");
            }

            _doc          = new Document(baseUri);
            _reader       = new CharacterReader(input);
            _errors       = errors;
            _tokeniser    = new Tokeniser(_reader, errors);
            _stack        = new DescendableLinkedList <Element>();
            this._baseUri = baseUri;
        }
Example #4
0
 public Document Parse(string input, string baseUri)
 {
     return(Parse(input, baseUri, ParseErrorList.NoTracking()));
 }
Example #5
0
 protected override void InitialiseParse(string input, string baseUri, ParseErrorList errors)
 {
     base.InitialiseParse(input, baseUri, errors);
     _stack.AddLast(_doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
 }
Example #6
0
 public Tokeniser(CharacterReader reader, ParseErrorList errors)
 {
     this._reader = reader;
     this._errors = errors;
 }
Example #7
0
        // static parse functions below
        /// <summary>
        /// Parse HTML into a Document.
        /// </summary>
        /// <param name="html">HTML to parse</param>
        /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param>
        /// <returns>parsed Document</returns>
        public static Document Parse(string html, string baseUri)
        {
            HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();

            return(treeBuilder.Parse(html, baseUri, ParseErrorList.NoTracking()));
        }
Example #8
0
        /// <summary>
        /// Utility method to unescape HTML entities from a string
        /// </summary>
        /// <param name="s">HTML escaped string</param>
        /// <param name="inAttribute">If the string is to be escaped in strict mode (as attributes are)</param>
        /// <returns>An unescaped string</returns>
        public static string UnescapeEntities(string s, bool inAttribute)
        {
            Tokeniser tokeniser = new Tokeniser(new CharacterReader(s), ParseErrorList.NoTracking());

            return(tokeniser.UnescapeEntities(inAttribute));
        }
Example #9
0
        /// <summary>
        /// Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context.
        /// </summary>
        /// <param name="fragmentHtml">the fragment of HTML to parse</param>
        /// <param name="context">(optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This provides stack context (for implicit element creation).</param>
        /// <param name="baseUri">base URI of document (i.e. original fetch location), for resolving relative URLs.</param>
        /// <returns>list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.</returns>
        public static IList <Node> ParseFragment(string fragmentHtml, Element context, string baseUri)
        {
            HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();

            return(treeBuilder.ParseFragment(fragmentHtml, context, baseUri, ParseErrorList.NoTracking()));
        }
Example #10
0
        public IList <Node> ParseFragment(string inputFragment, Element context, string baseUri, ParseErrorList errors)
        {
            // context may be null
            _state = HtmlTreeBuilderState.Initial;
            InitialiseParse(inputFragment, baseUri, errors);
            _contextElement  = context;
            _fragmentParsing = true;
            Element root = null;

            if (context != null)
            {
                if (context.OwnerDocument != null) // quirks setup:
                {
                    _doc.QuirksMode(context.OwnerDocument.QuirksMode());
                }

                // initialise the tokeniser state:
                string contextTag = context.TagName();
                if (StringUtil.In(contextTag, "title", "textarea"))
                {
                    _tokeniser.Transition(TokeniserState.RcData);
                }
                else if (StringUtil.In(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
                {
                    _tokeniser.Transition(TokeniserState.RawText);
                }
                else if (contextTag.Equals("script"))
                {
                    _tokeniser.Transition(TokeniserState.ScriptData);
                }
                else if (contextTag.Equals(("noscript")))
                {
                    _tokeniser.Transition(TokeniserState.Data); // if scripting enabled, rawtext
                }
                else if (contextTag.Equals("plaintext"))
                {
                    _tokeniser.Transition(TokeniserState.Data);
                }
                else
                {
                    _tokeniser.Transition(TokeniserState.Data); // default
                }

                root = new Element(Tag.ValueOf("html"), baseUri);
                _doc.AppendChild(root);
                _stack.AddFirst(root);
                ResetInsertionMode();
                // todo: setup form element to nearest form on context (up ancestor chain)
            }

            RunParser();
            if (context != null)
            {
                return(root.ChildNodes);
            }
            else
            {
                return(_doc.ChildNodes);
            }
        }
Example #11
0
 public override Document Parse(string input, string baseUri, ParseErrorList errors)
 {
     _state = HtmlTreeBuilderState.Initial;
     return(base.Parse(input, baseUri, errors));
 }