Beispiel #1
0
 internal async static Task<IDocument> LoadAsync(IBrowsingContext context, CreateDocumentOptions options, CancellationToken cancelToken)
 {
     var scripting = context.Configuration.IsScripting();
     var parserOptions = new HtmlParserOptions { IsScripting = scripting };
     var document = new HtmlDocument(context, options.Source);
     var parser = new HtmlDomBuilder(document);
     document.Setup(options);
     context.NavigateTo(document);
     context.Fire(new HtmlParseEvent(document, completed: false));
     await parser.ParseAsync(parserOptions, cancelToken).ConfigureAwait(false);
     context.Fire(new HtmlParseEvent(document, completed: true));
     return document;
 }
Beispiel #2
0
        /// <summary>
        /// Creates a new document fragment with the given nodelist as
        /// children.
        /// </summary>
        /// <param name="context">The context for the fragment mode.</param>
        /// <param name="html">The HTML source code to use.</param>
        internal DocumentFragment(Element context, String html)
            : this(context.Owner)
        {
            var source = new TextSource(html);
            var document = new HtmlDocument(Owner.Context, source);
            var parser = new HtmlDomBuilder(document);
            var options = new HtmlParserOptions
            {
                IsEmbedded = false,
                IsScripting = Owner.Options.IsScripting()
            };
            var root = parser.ParseFragment(options, context).DocumentElement;

            while (root.HasChildNodes)
            {
                var child = root.FirstChild;
                root.RemoveChild(child);
                this.PreInsert(child, null);
            }
        }
Beispiel #3
0
 internal async static Task<IDocument> LoadTextAsync(IBrowsingContext context, CreateDocumentOptions options, CancellationToken cancelToken)
 {
     var scripting = context.Configuration.IsScripting();
     var parserOptions = new HtmlParserOptions { IsScripting = scripting };
     var document = new HtmlDocument(context, options.Source);
     document.Setup(options);
     context.NavigateTo(document);
     var root = document.CreateElement(TagNames.Html);
     var head = document.CreateElement(TagNames.Head);
     var body = document.CreateElement(TagNames.Body);
     var pre = document.CreateElement(TagNames.Pre);
     document.AppendChild(root);
     root.AppendChild(head);
     root.AppendChild(body);
     body.AppendChild(pre);
     pre.SetAttribute(AttributeNames.Style, "word-wrap: break-word; white-space: pre-wrap;");
     await options.Source.PrefetchAllAsync(cancelToken).ConfigureAwait(false);
     pre.TextContent = options.Source.Text;
     return document;
 }
        public static INodeList ToHtmlFragment(this String sourceCode, IElement context = null, IConfiguration configuration = null)
        {
            var ctx = BrowsingContext.New(configuration);
            var source = new TextSource(sourceCode);
            var document = new HtmlDocument(ctx, source);
            var parser = new HtmlDomBuilder(sourceCode, configuration);
            var element = context as Element;

            if (element != null)
            {
                var options = new HtmlParserOptions
                {
                    IsEmbedded = false,
                    IsScripting = configuration.IsScripting()
                };
                return parser.ParseFragment(options, element).DocumentElement.ChildNodes;
            }
            else
            {
                return parser.Parse(default(HtmlParserOptions)).ChildNodes;
            }
        }
        /// <summary>
        /// Parses the given source asynchronously and creates the document.
        /// </summary>
        /// <param name="options">The options to use for parsing.</param>
        /// <param name="cancelToken">The cancellation token to use.</param>
        public async Task<HtmlDocument> ParseAsync(HtmlParserOptions options, CancellationToken cancelToken)
        {
            var source = _document.Source;
            var token = default(HtmlToken);
            _options = options;

            do
            {
                if (source.Length - source.Index < 1024)
                {
                    await source.Prefetch(8192, cancelToken).ConfigureAwait(false);
                }

                token = _tokenizer.Get();
                Consume(token);

                if (_waiting != null)
                {
                    await _waiting.ConfigureAwait(false);
                    _waiting = null;
                }
            }
            while (token.Type != HtmlTokenType.EndOfFile);

            return _document;
        }
 /// <summary>
 /// Creates a new parser with the custom options and the given context.
 /// </summary>
 /// <param name="options">The options to use.</param>
 /// <param name="context">The context to use.</param>
 public HtmlParser(HtmlParserOptions options, IBrowsingContext context)
 {
     _options = options;
     _context = context;
 }
 /// <summary>
 /// Creates a new parser with the custom options and configuration.
 /// </summary>
 /// <param name="options">The options to use.</param>
 /// <param name="configuration">The configuration to use.</param>
 public HtmlParser(HtmlParserOptions options, IConfiguration configuration)
     : this(options, BrowsingContext.New(configuration))
 {
 }
 /// <summary>
 /// Creates a new parser with the custom options.
 /// </summary>
 /// <param name="options">The options to use.</param>
 public HtmlParser(HtmlParserOptions options)
     : this(options, Configuration.Default)
 {
 }
Beispiel #9
0
 /// <summary>
 /// Creates a new parser with the custom options and the given context.
 /// </summary>
 /// <param name="options">The options to use.</param>
 /// <param name="context">The context to use.</param>
 public HtmlParser(HtmlParserOptions options, IBrowsingContext context)
 {
     _options = options;
     _context = context;
 }
Beispiel #10
0
 /// <summary>
 /// Creates a new parser with the custom options and configuration.
 /// </summary>
 /// <param name="options">The options to use.</param>
 /// <param name="configuration">The configuration to use.</param>
 public HtmlParser(HtmlParserOptions options, IConfiguration configuration)
     : this(options, BrowsingContext.New(configuration))
 {
 }
Beispiel #11
0
        public void NormalModeShouldError()
        {
            var html = @"<!DOCTYPE html>
<title>Test</title>
<body>
    <div myAttribute=""blabla>123</div>
</body>";
            var errors = new List<HtmlErrorEvent>();
            var options = new HtmlParserOptions { IsStrictMode = false };
            var context = BrowsingContext.New(Configuration.Default);
            context.ParseError += (s, ev) => errors.Add((HtmlErrorEvent)ev);
            var parser = new HtmlParser(options, context);
            parser.Parse(html);
            Assert.AreEqual(1, errors.Count);
        }
        /// <summary>
        /// Loads the document in the provided context from the given response.
        /// </summary>
        /// <param name="context">The browsing context.</param>
        /// <param name="response">The response to consider.</param>
        /// <param name="source">The source to use.</param>
        /// <param name="cancelToken">Token for cancellation.</param>
        /// <returns>The task that builds the document.</returns>
        internal async static Task<HtmlDocument> LoadAsync(IBrowsingContext context, IResponse response, TextSource source, CancellationToken cancelToken)
        {
            var document = new HtmlDocument(context, source);

            using (var evt = new HtmlParseStartEvent(document))
            {
                var config = context.Configuration;
                var events = config.Events;
                var parser = new HtmlDomBuilder(document);
                document.ContentType = response.Headers.GetOrDefault(HeaderNames.ContentType, MimeTypes.Html);
                document.Referrer = response.Headers.GetOrDefault(HeaderNames.Referer, String.Empty);
                document.DocumentUri = response.Address.Href;
                document.Cookie = response.Headers.GetOrDefault(HeaderNames.SetCookie, String.Empty);
                document.ReadyState = DocumentReadyState.Loading;

                if (events != null)
                    events.Publish(evt);

                var options = new HtmlParserOptions { IsScripting = config.IsScripting() };
                await parser.ParseAsync(options, cancelToken).ConfigureAwait(false);
            }

            return document;
        }
Beispiel #13
0
        /// <summary>
        /// Loads the document in the provided context from the given response.
        /// </summary>
        /// <param name="context">The browsing context.</param>
        /// <param name="options">The creation options to consider.</param>
        /// <param name="cancelToken">Token for cancellation.</param>
        /// <returns>The task that builds the document.</returns>
        internal async static Task<IDocument> LoadAsync(IBrowsingContext context, CreateDocumentOptions options, CancellationToken cancelToken)
        {
            var document = new HtmlDocument(context, options.Source);
            var evt = new HtmlParseStartEvent(document);
            var config = context.Configuration;
            var events = config.Events;
            var parser = new HtmlDomBuilder(document);
            var parserOptions = new HtmlParserOptions
            {
                IsScripting = config.IsScripting()
            };
            document.Setup(options);
            context.NavigateTo(document);

            if (events != null)
            {
                events.Publish(evt);
            }

            await parser.ParseAsync(parserOptions, cancelToken).ConfigureAwait(false);
            evt.FireEnd();
            return document;
        }
Beispiel #14
0
        /// <summary>
        /// Parses the given source and creates the document.
        /// </summary>
        /// <param name="options">The options to use for parsing.</param>
        public HtmlDocument Parse(HtmlParserOptions options)
        {
            var token = default(HtmlToken);
            _tokenizer.IsStrictMode = options.IsStrictMode;
            _options = options;

            do
            {
                token = _tokenizer.Get();
                Consume(token);
                _waiting?.Wait();
                _waiting = null;
            }
            while (token.Type != HtmlTokenType.EndOfFile);

            return _document;
        }
Beispiel #15
0
        public void StrictModeShouldYieldException()
        {
            var html = @"<!DOCTYPE html>
<title>Test</title>
<body>
    <div myAttribute=""blabla>123</div>
</body>";
            var options = new HtmlParserOptions { IsStrictMode = true };
            var parser = new HtmlParser(options);
            Assert.Catch<HtmlParseException>(() => parser.Parse(html));
        }
        /// <summary>
        /// Parses the given source and creates the document.
        /// </summary>
        /// <param name="options">The options to use for parsing.</param>
        public HtmlDocument Parse(HtmlParserOptions options)
        {
            var token = default(HtmlToken);
            _options = options;

            do
            {
                token = _tokenizer.Get();
                Consume(token);

                if (_waiting != null)
                {
                    _waiting.Wait();
                    _waiting = null;
                }
            }
            while (token.Type != HtmlTokenType.EndOfFile);

            return _document;
        }
        /// <summary>
        /// Switches to the fragment algorithm with the specified context
        /// element. Then parses the given source and creates the document.
        /// </summary>
        /// <param name="options">The options to use for parsing.</param>
        /// <param name="context">
        /// The context element where the algorithm is applied to.
        /// </param>
        public HtmlDocument ParseFragment(HtmlParserOptions options, Element context)
        {
            if (context == null)
            {
                throw new ArgumentNullException("context");
            }

            var tagName = context.LocalName;

            if (tagName.IsOneOf(TagNames.Title, TagNames.Textarea))
            {
                _tokenizer.State = HtmlParseMode.RCData;
            }
            else if (tagName.IsOneOf(TagNames.Style, TagNames.Xmp, TagNames.Iframe, TagNames.NoEmbed, TagNames.NoFrames))
            {
                _tokenizer.State = HtmlParseMode.Rawtext;
            }
            else if (tagName.Is(TagNames.Script))
            {
                _tokenizer.State = HtmlParseMode.Script;
            }
            else if (tagName.Is(TagNames.Plaintext))
            {
                _tokenizer.State = HtmlParseMode.Plaintext;
            }
            else if (tagName.Is(TagNames.NoScript) && options.IsScripting)
            {
                _tokenizer.State = HtmlParseMode.Rawtext;
            }

            var root = new HtmlHtmlElement(_document);
            _document.AddNode(root);
            _openElements.Add(root);

            if (context is HtmlTemplateElement)
            {
                _templateModes.Push(HtmlTreeMode.InTemplate);
            }

            Reset(context);

            _fragmentContext = context;
            _tokenizer.IsAcceptingCharacterData = !AdjustedCurrentNode.Flags.HasFlag(NodeFlags.HtmlMember);

            do
            {
                if (context is HtmlFormElement)
                {
                    _currentFormElement = (HtmlFormElement)context;
                    break;
                }

                context = context.ParentElement as Element;
            }
            while (context != null);

            return Parse(options);
        }
Beispiel #18
0
 /// <summary>
 /// Creates a new parser with the custom options.
 /// </summary>
 /// <param name="options">The options to use.</param>
 public HtmlParser(HtmlParserOptions options)
     : this(options, Configuration.Default)
 {
 }