Пример #1
0
        public IList<Node> ParseFragment(string inputFragment, Element context, string baseUri, ParseErrorList errors)
        {
            // context may be null
            _state = HtmlTreeBuilderState.Initial;
            InitialiseParse(inputFragment, baseUri, errors);
            _contextElement = context;
            _fragmentParsing = true;
            Element root = null;

            if (context != null)
            {
                if (context.OwnerDocument != null) // quirks setup:
                {
                    _doc.QuirksMode(context.OwnerDocument.QuirksMode());
                }

                // initialise the tokeniser state:
                string contextTag = context.TagName();
                if (StringUtil.In(contextTag, "title", "textarea"))
                {
                    _tokeniser.Transition(TokeniserState.RcData);
                }
                else if (StringUtil.In(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
                {
                    _tokeniser.Transition(TokeniserState.RawText);
                }
                else if (contextTag.Equals("script"))
                {
                    _tokeniser.Transition(TokeniserState.ScriptData);
                }
                else if (contextTag.Equals(("noscript")))
                {
                    _tokeniser.Transition(TokeniserState.Data); // if scripting enabled, rawtext
                }
                else if (contextTag.Equals("plaintext"))
                {
                    _tokeniser.Transition(TokeniserState.Data);
                }
                else
                {
                    _tokeniser.Transition(TokeniserState.Data); // default
                }

                root = new Element(Tag.ValueOf("html"), baseUri);
                _doc.AppendChild(root);
                _stack.AddFirst(root);
                ResetInsertionMode();
                // todo: setup form element to nearest form on context (up ancestor chain)
            }

            RunParser();
            if (context != null)
            {
                return root.ChildNodes;
            }
            else
            {
                return _doc.ChildNodes;
            }
        }
Пример #2
0
 internal override void InitialiseParse(string input, string baseUri, ParseErrorList errors)
 {
     base.InitialiseParse(input, baseUri, errors);
     stack.AddLast(doc);
     // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
     doc.OutputSettings.Syntax = DocumentSyntax.Xml;
 }
Пример #3
0
        public virtual Document Parse(string input, string baseUri, ParseErrorList errors)
        {
            InitialiseParse(input, baseUri, errors);

            RunParser();

            return _doc;
        }
Пример #4
0
 // current doc we are building into
 // the stack of open elements
 // current base uri, for creating new elements
 // currentToken is used only for error tracking.
 // null when not tracking errors
 internal virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors)
 {
     Validate.NotNull(input, "String input must not be null");
     Validate.NotNull(baseUri, "BaseURI must not be null");
     doc = new Document(baseUri);
     reader = new CharacterReader(input);
     this.errors = errors;
     tokeniser = new Tokeniser(reader, errors);
     stack = new DescendableLinkedList<Element>();
     this.baseUri = baseUri;
 }
Пример #5
0
 internal Tokeniser(CharacterReader reader, ParseErrorList errors)
 {
     // replaces null character
     // html input
     // errors found while tokenising
     // current tokenisation state
     // the token we are about to emit on next read
     // buffers characters to output as one token
     // buffers data looking for </script>
     // tag we are building up
     // doctype building up
     // comment building up
     // the last start tag emitted, to test appropriate end tag
     this.reader = reader;
     this.errors = errors;
 }
Пример #6
0
        protected ParseErrorList _errors; // null when not tracking errors

        protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors)
        {
            if (input == null)
            {
                throw new ArgumentNullException("String input must not be null");
            }
            if (baseUri == null)
            {
                throw new ArgumentNullException("BaseURI must not be null");
            }

            _doc = new Document(baseUri);
            _reader = new CharacterReader(input);
            _errors = errors;
            _tokeniser = new Tokeniser(_reader, errors);
            _stack = new DescendableLinkedList<Element>();
            this._baseUri = baseUri;
        }
Пример #7
0
 internal IReadOnlyList<Node> ParseFragment(string inputFragment, string baseUri, ParseErrorList errors)
 {
     InitialiseParse(inputFragment, baseUri, errors);
     RunParser();
     return doc.ChildNodes;
 }
Пример #8
0
 protected override void InitialiseParse(string input, string baseUri, ParseErrorList errors)
 {
     base.InitialiseParse(input, baseUri, errors);
     _stack.AddLast(_doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
 }
Пример #9
0
 /// <summary>
 /// Parse HTML into a Document
 /// </summary>
 /// <param name="html"></param>
 /// <param name="baseUri"></param>
 /// <returns></returns>
 public Document ParseInput(string html, string baseUri)
 {
     errors = CanTrackErrors ? ParseErrorList.Tracking(maxErrors) : ParseErrorList.NoTracking();
     Document doc = treeBuilder.Parse(html, baseUri, errors);
     return doc;
 }
Пример #10
0
 public override Document Parse(string input, string baseUri, ParseErrorList errors)
 {
     _state = HtmlTreeBuilderState.Initial;
     return base.Parse(input, baseUri, errors);
 }
Пример #11
0
        internal IReadOnlyList<Node> ParseFragment(string inputFragment, Element context, string baseUri, ParseErrorList errors)
        {
            // context may be null
            state = HtmlTreeBuilderState.Initial;
            InitialiseParse(inputFragment, baseUri, errors);
            contextElement = context;
            fragmentParsing = true;
            Element root = null;
            if (context != null)
            {
                if (context.OwnerDocument != null)
                {
                    // quirks setup:
                    doc.QuirksMode = context.OwnerDocument.QuirksMode;
                }
                // initialise the tokeniser state:
                string contextTag = context.TagName;
                if (StringUtil.In(contextTag, "title", "textarea"))
                {
                    tokeniser.Transition(TokeniserState.Rcdata);
                }
                else if (StringUtil.In(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
                {
                    tokeniser.Transition(TokeniserState.Rawtext);
                }
                else if (contextTag.Equals("script"))
                {
                    tokeniser.Transition(TokeniserState.ScriptData);
                }
                else if (contextTag.Equals(("noscript")))
                {
                    tokeniser.Transition(TokeniserState.Data); // if scripting enabled, rawtext
                }
                else if (contextTag.Equals("plaintext"))
                {
                    tokeniser.Transition(TokeniserState.Data);
                }
                else
                {
                    tokeniser.Transition(TokeniserState.Data);
                }

                // default
                root = new Element(Tag.ValueOf("html"), baseUri);
                doc.AppendChild(root);
                stack.Push(root);
                ResetInsertionMode();
                // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
                // with form correctly
                Elements contextChain = context.Parents;
                contextChain.Insert(0, context);
                foreach (Element parent in contextChain)
                {
                    if (parent is FormElement)
                    {
                        formElement = (FormElement)parent;
                        break;
                    }
                }
            }
            RunParser();
            if (context != null)
            {
                return root.ChildNodes;
            }
            else
            {
                return doc.ChildNodes;
            }
        }
Пример #12
0
 // tag searches
 //private static final String[] TagsScriptStyle = new String[]{"script", "style"};
 // the current state
 // original / marked state
 // the current head element
 // the current form element
 // fragment parse context -- could be null even if fragment parsing
 // active (open) formatting elements
 // chars in table to be shifted out
 // if ok to go into frameset
 // if next inserts should be fostered
 // if parsing a fragment of html
 internal override Document Parse(string input, string baseUri, ParseErrorList errors)
 {
     state = HtmlTreeBuilderState.Initial;
     baseUriSetFromDoc = false;
     return base.Parse(input, baseUri, errors);
 }
Пример #13
0
 public Tokeniser(CharacterReader reader, ParseErrorList errors)
 {
     this._reader = reader;
     this._errors = errors;
 }