Exemple #1
0
        public HtmlDocument ParseInput(String html, Uri baseUri)
        {
            errors = IsTrackErrors() ? HtmlParseErrorCollection.Tracking(maxErrors) : HtmlParseErrorCollection.NoTracking();
            HtmlDocument doc = treeBuilder.Parse(html, baseUri, errors);

            return(doc);
        }
Exemple #2
0
        protected virtual void InitialiseParse(string input, Uri baseUri, HtmlParseErrorCollection errors)
        {
            if (input == null)
            {
                throw new ArgumentNullException(nameof(input));
            }

            this.doc       = CreateDocument(baseUri);
            this.reader    = new CharacterReader(input);
            this.errors    = errors;
            this.tokeniser = new Tokeniser(reader, errors);
            this.stack     = new DescendableLinkedList <DomContainer>();
            this.baseUri   = baseUri;
        }
Exemple #3
0
 public abstract IList <DomNode> ParseFragment(string inputFragment, HtmlElement context, Uri baseUri, HtmlParseErrorCollection errors);
Exemple #4
0
 public virtual HtmlDocument Parse(string input, Uri baseUri, HtmlParseErrorCollection errors)
 {
     InitialiseParse(input, baseUri, errors);
     RunParser();
     return(doc);
 }
Exemple #5
0
        public override IList <DomNode> ParseFragment(string inputFragment, HtmlElement context, Uri baseUri, HtmlParseErrorCollection errors)
        {
            InitialiseParse(inputFragment, baseUri, errors);
            HtmlElement root = (HtmlElement)doc.CreateElement("root");

            stack.AddLast(root);
            RunParser();

            // TODO Shouldn't need ToList when concurrent modifications can be avoided
            return(root.ChildNodes.ToList());
        }
Exemple #6
0
 protected override void InitialiseParse(String input, Uri baseUri, HtmlParseErrorCollection errors)
 {
     base.InitialiseParse(input, baseUri, errors);
     stack.AddLast(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
 }
Exemple #7
0
 public Tokeniser(CharacterReader reader, HtmlParseErrorCollection errors)
 {
     this.reader = reader;
     this.errors = errors;
 }
Exemple #8
0
        public static IList <DomNode> ParseFragment(String fragmentHtml, HtmlElement context, Uri baseUri)
        {
            HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();

            return(treeBuilder.ParseFragment(fragmentHtml, context, baseUri, HtmlParseErrorCollection.NoTracking()));
        }
Exemple #9
0
        public static HtmlDocument Parse(String html, Uri baseUri)
        {
            TreeBuilder treeBuilder = new HtmlTreeBuilder();

            return(treeBuilder.Parse(html, baseUri, HtmlParseErrorCollection.NoTracking()));
        }
Exemple #10
0
        public override IList <DomNode> ParseFragment(string inputFragment, HtmlElement context, Uri baseUri, HtmlParseErrorCollection errors)
        {
            // context may be null
            InitialiseParse(inputFragment, baseUri, errors);
            contextElement  = context;
            fragmentParsing = true;
            HtmlElement root = null;

            if (context != null)
            {
                if (context.OwnerDocument != null)   // quirks setup:
                {
                    doc.QuirksMode = context.OwnerDocument.GetQuirksMode();
                }

                // initialise the tokeniser state:
                string contextTag = context.NodeName;

                switch (contextTag)
                {
                case "title":
                case "textarea":
                    tokeniser.Transition(TokeniserState.Rcdata);
                    break;

                case "script":
                    tokeniser.Transition(TokeniserState.ScriptData);
                    break;

                case "noscript":
                    tokeniser.Transition(TokeniserState.Data);     // if scripting enabled, rawtext
                    break;

                case "plaintext":
                    tokeniser.Transition(TokeniserState.Data);
                    break;

                case "iframe":
                case "noembed":
                case "noframes":
                case "style":
                case "xmp":
                    tokeniser.Transition(TokeniserState.Rawtext);
                    break;

                default:
                    tokeniser.Transition(TokeniserState.Data);     // default
                    break;
                }

                root         = (HtmlElement)doc.CreateElement("html");
                root.BaseUri = baseUri;
                doc.Append(root);
                stack.AddFirst(root);
                ResetInsertionMode();
                // TODO: setup form element to nearest form on context (up ancestor chain)
            }

            RunParser();
            // TODO Shouldn't need ToList when concurrent modifications can be saved
            if (context != null)
            {
                return(root.ChildNodes.ToList());
            }
            else
            {
                return(doc.ChildNodes.ToList());
            }
        }
Exemple #11
0
 public override HtmlDocument Parse(string input, Uri baseUri, HtmlParseErrorCollection errors)
 {
     _state = HtmlTreeBuilderState.Initial;
     return(base.Parse(input, baseUri, errors));
 }
Exemple #12
0
 public static void UnexpectedlyReachedEof(HtmlParseErrorCollection errors, CharacterReader reader, TokeniserState state)
 {
     errors.Add(reader.Position, "Unexpectedly reached end of file (EOF) in input state [{0}]", state);
 }
Exemple #13
0
 public static void UnexpectedChar(HtmlParseErrorCollection errors, CharacterReader reader, TokeniserState state)
 {
     errors.Add(reader.Position, "Unexpected char '{0}' in input state [{1}]", reader.Current, state);
 }
Exemple #14
0
 public static void UnexpectedToken(this HtmlParseErrorCollection errors, int readerPos, HtmlTreeBuilderState state, Token currentToken)
 {
     errors.Add(readerPos, "Unexpected token [{0}] when in state [{1}]", currentToken.TokenTypeName, state);
 }