protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel)
 {
     State = Brushes.LightGray;
     this.document = document;
     ChangeQuery();
     await Task.Yield();
 }
        protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel)
        {
            var elements = new Dictionary<String, Int32>();
            var attributes = new Dictionary<String, Int32>();
            var classes = new Dictionary<String, Int32>();
            var words = new Dictionary<String, Int32>();
            var various = new Dictionary<String, Int32>();

            Status = "Gathering statistics ...";

            various.Add("Links", document.Anchors.Length);
            various.Add("Images", document.Images.Length);
            various.Add("Scripts", document.Scripts.Length);
            various.Add("Stylesheets", document.StyleSheets.Length);
            various.Add("Embeds", document.Embeds.Length);
            various.Add("Forms", document.Forms.Length);

            await Task.Run(() => Inspect(document.DocumentElement, elements, classes, attributes));
            cancel.ThrowIfCancellationRequested();
            await Task.Run(() => Words(document.DocumentElement.TextContent.ToCharArray(), words));
            cancel.ThrowIfCancellationRequested();

            MostElements = CreatePieChart("Most elements", elements);
            MostClasses = CreatePieChart("Most classes", classes);
            MostWords = CreatePieChart("Most words", words);
            MostAttributes = CreatePieChart("Most attributes", attributes);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Creates a new builder with the specified source.
        /// </summary>
        /// <param name="source">The code manager.</param>
        /// <param name="document">The document to fill.</param>
        /// <param name="options">Options to use for the document generation.</param>
        DocumentBuilder(SourceManager source, HTMLDocument document, DocumentOptions options)
        {
            document.Options = options;
            parser = new HtmlParser(document, source);
			parser.ErrorOccurred += ParseErrorOccurred;

			if (options.OnError != null)
				parser.ErrorOccurred += options.OnError;
        }
        protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel)
        {
            this.url = url;
            Status = "Rendering ...";

            Reset();
            root.Blocks.Add(RenderBox(document.Body));

            await Task.Yield();
        }
        public Document CreateDocument(String namespaceURI, String qualifiedName, DocumentType doctype)
        {
            Document doc = null;

            if (Namespaces.Html == namespaceURI)
                doc = new HTMLDocument();
            else
                doc = new Document();

            doc.AppendChild(doctype);
            doc.NodeName = qualifiedName ?? doc.NodeName;
            return doc;
        }
        protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel)
        {
            nodes.Clear();
            Status = "Constructing tree ...";
            var elements = TreeNodeViewModel.SelectFrom(document.ChildNodes);

            foreach (var element in elements)
            {
                element.Parent = nodes;
                nodes.Add(element);
            }

            await Task.Yield();
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Loads a document from the given source.
 /// </summary>
 /// <param name="source">
 /// A valid HTML document.
 /// </param>
 /// <returns>The current instance.</returns>
 public CssSelectorTest LoadFromSource(String source)
 {
     _doc = DocumentBuilder.Html(source);
     return this;
 }
Ejemplo n.º 8
0
        /// <summary>
        /// Builds a list of nodes according with 8.4 Parsing HTML fragments.
        /// </summary>
        /// <param name="sourceCode">The string to use as source code.</param>
        /// <param name="context">The context node to use.</param>
        /// <returns>A list of parsed nodes.</returns>
        public static NodeList HtmlFragment(String sourceCode, Node context = null)
        {
            var source = new SourceManager(sourceCode);
            var doc = new HTMLDocument();
            var db = new DocumentBuilder(source, doc);

            if (context != null)
            {
                if (context.OwnerDocument != null && context.OwnerDocument.QuirksMode != QuirksMode.Off)
                    doc.QuirksMode = context.OwnerDocument.QuirksMode;

                //    Note: For performance reasons, an implementation that does not report errors and that uses
                //          the actual state machine described in this specification directly could use the
                //          PLAINTEXT state instead of the RAWTEXT and script data states where those are mentioned
                //          in the list above. Except for rules regarding parse errors, they are equivalent, since
                //          there is no appropriate end tag token in the fragment case, yet they involve far
                //          fewer state transitions.

                ((HtmlParser)db.parser).SwitchToFragment(context);
                return db.HtmlResult.DocumentElement.ChildNodes;
            }

            return db.HtmlResult.ChildNodes;
        }
Ejemplo n.º 9
0
 /// <summary>
 /// Creates a new builder with the specified source.
 /// </summary>
 /// <param name="source">The code manager.</param>
 /// <param name="document">The document to fill.</param>
 DocumentBuilder(SourceManager source, HTMLDocument document)
 {
     parser = new HtmlParser(document, source);
     parser.ErrorOccurred += ParseErrorOccurred;
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Builds a list of nodes according with 8.4 Parsing HTML fragments.
        /// </summary>
        /// <param name="sourceCode">The string to use as source code.</param>
        /// <param name="context">[Optional] The context node to use.</param>
        /// <param name="options">[Optional] Options to use for the document generation.</param>
        /// <returns>A list of parsed nodes.</returns>
        public static NodeList HtmlFragment(String sourceCode, Node context = null, DocumentOptions options = null)
        {
            var source = new SourceManager(sourceCode);
            var doc = new HTMLDocument();

            //Disable scripting for HTML fragments (security reasons)
            options = options ?? new DocumentOptions(scripting: false);

            var db = new DocumentBuilder(source, doc, options);

            if (context != null)
            {
                if (context.OwnerDocument != null && context.OwnerDocument.QuirksMode != QuirksMode.Off)
                    doc.QuirksMode = context.OwnerDocument.QuirksMode;

                var parser = (HtmlParser)db.parser;
                parser.SwitchToFragment(context);
                return parser.Result.DocumentElement.ChildNodes;
            }

            return db.HtmlResult.ChildNodes;
        }
Ejemplo n.º 11
0
        public void TreeNoScriptWithNoScriptCommentInside()
        {
            var doc = new HTMLDocument();
            var parser = new HtmlParser(doc, @"<noscript><!--<noscript></noscript>--></noscript>");
            doc.Options = new DocumentOptions(scripting: true);
            parser.Parse();

            var dochtml0 = doc.ChildNodes[0];
            Assert.AreEqual(2, dochtml0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0.Attributes.Length);
            Assert.AreEqual("html", dochtml0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0.NodeType);

            var dochtml0head0 = dochtml0.ChildNodes[0];
            Assert.AreEqual(1, dochtml0head0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0head0.Attributes.Length);
            Assert.AreEqual("head", dochtml0head0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0head0.NodeType);

            var dochtml0head0noscript0 = dochtml0head0.ChildNodes[0];
            Assert.AreEqual(1, dochtml0head0noscript0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0head0noscript0.Attributes.Length);
            Assert.AreEqual("noscript", dochtml0head0noscript0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0head0noscript0.NodeType);

            var dochtml0head0noscript0Text0 = dochtml0head0noscript0.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml0head0noscript0Text0.NodeType);
            Assert.AreEqual("<!--<noscript>", dochtml0head0noscript0Text0.TextContent);

            var dochtml0body1 = dochtml0.ChildNodes[1];
            Assert.AreEqual(1, dochtml0body1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0body1.Attributes.Length);
            Assert.AreEqual("body", dochtml0body1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0body1.NodeType);

            var dochtml0body1Text0 = dochtml0body1.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml0body1Text0.NodeType);
            Assert.AreEqual("-->", dochtml0body1Text0.TextContent);
        }
Ejemplo n.º 12
0
 public void Setup()
 {
     document = DocumentBuilder.Html(Assets.QuirksMode);
 }
Ejemplo n.º 13
0
 /// <summary>
 /// Loads a HTML document from the given URL.
 /// </summary>
 /// <param name="url">The URL that hosts the HTML content.</param>
 /// <returns>The document with the parsed content.</returns>
 public static HTMLDocument LoadFromURL(String url)
 {
     var doc = new HTMLDocument();
     return doc.Load(url);
 }
Ejemplo n.º 14
0
        public void ScriptNoScriptAfterDoctypeWithIFrameContentAndTextAfter()
        {
            var doc = new HTMLDocument();
            var parser = new HtmlParser(doc, "<!doctype html><noscript><iframe></noscript>X");
            doc.Options = new DocumentOptions(scripting: true);
            parser.Parse();

            var docType0 = doc.ChildNodes[0] as DocumentType;
            Assert.IsNotNull(docType0);
            Assert.AreEqual(NodeType.DocumentType, docType0.NodeType);
            Assert.AreEqual(@"html", docType0.Name);

            var dochtml1 = doc.ChildNodes[1];
            Assert.AreEqual(2, dochtml1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1.Attributes.Length);
            Assert.AreEqual("html", dochtml1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1.NodeType);

            var dochtml1head0 = dochtml1.ChildNodes[0];
            Assert.AreEqual(1, dochtml1head0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1head0.Attributes.Length);
            Assert.AreEqual("head", dochtml1head0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1head0.NodeType);

            var dochtml1head0noscript0 = dochtml1head0.ChildNodes[0];
            Assert.AreEqual(1, dochtml1head0noscript0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1head0noscript0.Attributes.Length);
            Assert.AreEqual("noscript", dochtml1head0noscript0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1head0noscript0.NodeType);

            var dochtml1head0noscript0Text0 = dochtml1head0noscript0.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml1head0noscript0Text0.NodeType);
            Assert.AreEqual("<iframe>", dochtml1head0noscript0Text0.TextContent);

            var dochtml1body1 = dochtml1.ChildNodes[1];
            Assert.AreEqual(1, dochtml1body1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1body1.Attributes.Length);
            Assert.AreEqual("body", dochtml1body1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1body1.NodeType);

            var dochtml1body1Text0 = dochtml1body1.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml1body1Text0.NodeType);
            Assert.AreEqual("X", dochtml1body1Text0.TextContent);
        }
Ejemplo n.º 15
0
        public void ScriptNoScriptWithCommentStartAndTextInsideBeforeClosing()
        {
            var doc = new HTMLDocument();
            var parser = new HtmlParser(doc, "<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>");
            doc.Options = new DocumentOptions(scripting: true);
            parser.Parse();

            var docType0 = doc.ChildNodes[0] as DocumentType;
            Assert.IsNotNull(docType0);
            Assert.AreEqual(NodeType.DocumentType, docType0.NodeType);
            Assert.AreEqual(@"html", docType0.Name);

            var dochtml1 = doc.ChildNodes[1];
            Assert.AreEqual(2, dochtml1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1.Attributes.Length);
            Assert.AreEqual("html", dochtml1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1.NodeType);

            var dochtml1head0 = dochtml1.ChildNodes[0];
            Assert.AreEqual(1, dochtml1head0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1head0.Attributes.Length);
            Assert.AreEqual("head", dochtml1head0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1head0.NodeType);

            var dochtml1head0noscript0 = dochtml1head0.ChildNodes[0];
            Assert.AreEqual(1, dochtml1head0noscript0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1head0noscript0.Attributes.Length);
            Assert.AreEqual("noscript", dochtml1head0noscript0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1head0noscript0.NodeType);

            var dochtml1head0noscript0Text0 = dochtml1head0noscript0.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml1head0noscript0Text0.NodeType);
            Assert.AreEqual("<!--", dochtml1head0noscript0Text0.TextContent);

            var dochtml1body1 = dochtml1.ChildNodes[1];
            Assert.AreEqual(2, dochtml1body1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1body1.Attributes.Length);
            Assert.AreEqual("body", dochtml1body1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1body1.NodeType);

            var dochtml1body1Text0 = dochtml1body1.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml1body1Text0.NodeType);
            Assert.AreEqual("X", dochtml1body1Text0.TextContent);

            var dochtml1body1noscript1 = dochtml1body1.ChildNodes[1];
            Assert.AreEqual(1, dochtml1body1noscript1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml1body1noscript1.Attributes.Length);
            Assert.AreEqual("noscript", dochtml1body1noscript1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml1body1noscript1.NodeType);

            var dochtml1body1noscript1Text0 = dochtml1body1noscript1.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml1body1noscript1Text0.NodeType);
            Assert.AreEqual("-->", dochtml1body1noscript1Text0.TextContent);
        }
Ejemplo n.º 16
0
 protected abstract Task Use(Uri url, HTMLDocument document, CancellationToken cancel);
        protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel)
        {
            local = url;
            Selected = null;
            source.Clear();
            Status = "Looking for stylesheets ...";

            for (int i = 0; i < document.StyleSheets.Length; i++)
            {
                var s = document.StyleSheets[i];
                source.Add(s);
            }

            await Task.Yield();
        }
Ejemplo n.º 18
0
 void UpdateRecent(HTMLDocument document, String url, Uri uri)
 {
     recentAddress = url;
     SettingsViewModel.Instance.AddUrl(url);
     recentDocument = document;
     recentUrl = uri;
     recentState++;
     state = recentState;
 }
Ejemplo n.º 19
0
 public void Setup()
 {
     document = DocumentBuilder.Html(Assets.SelectorsWebpage);
 }
Ejemplo n.º 20
0
        public void TreeParagraphWithTightAttributesAndNoScriptTagScriptingEnabled()
        {
            var doc = new HTMLDocument();
            var parser = new HtmlParser(doc, @"<p id=""status""><noscript><strong>A</strong></noscript><span>B</span></p>");
            doc.Options = new DocumentOptions(scripting: true);
            parser.Parse();

            var dochtml0 = doc.ChildNodes[0];
            Assert.AreEqual(2, dochtml0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0.Attributes.Length);
            Assert.AreEqual("html", dochtml0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0.NodeType);

            var dochtml0head0 = dochtml0.ChildNodes[0];
            Assert.AreEqual(0, dochtml0head0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0head0.Attributes.Length);
            Assert.AreEqual("head", dochtml0head0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0head0.NodeType);

            var dochtml0body1 = dochtml0.ChildNodes[1];
            Assert.AreEqual(1, dochtml0body1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0body1.Attributes.Length);
            Assert.AreEqual("body", dochtml0body1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0body1.NodeType);

            var dochtml0body1p0 = dochtml0body1.ChildNodes[0];
            Assert.AreEqual(2, dochtml0body1p0.ChildNodes.Length);
            Assert.AreEqual(1, dochtml0body1p0.Attributes.Length);
            Assert.AreEqual("p", dochtml0body1p0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0body1p0.NodeType);
            Assert.AreEqual("status", dochtml0body1p0.Attributes["id"].Value);

            var dochtml0body1p0noscript0 = dochtml0body1p0.ChildNodes[0];
            Assert.AreEqual(1, dochtml0body1p0noscript0.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0body1p0noscript0.Attributes.Length);
            Assert.AreEqual("noscript", dochtml0body1p0noscript0.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0body1p0noscript0.NodeType);

            var dochtml0body1p0noscript0Text0 = dochtml0body1p0noscript0.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml0body1p0noscript0Text0.NodeType);
            Assert.AreEqual("<strong>A</strong>", dochtml0body1p0noscript0Text0.TextContent);

            var dochtml0body1p0span1 = dochtml0body1p0.ChildNodes[1];
            Assert.AreEqual(1, dochtml0body1p0span1.ChildNodes.Length);
            Assert.AreEqual(0, dochtml0body1p0span1.Attributes.Length);
            Assert.AreEqual("span", dochtml0body1p0span1.NodeName);
            Assert.AreEqual(NodeType.Element, dochtml0body1p0span1.NodeType);

            var dochtml0body1p0span1Text0 = dochtml0body1p0span1.ChildNodes[0];
            Assert.AreEqual(NodeType.Text, dochtml0body1p0span1Text0.NodeType);
            Assert.AreEqual("B", dochtml0body1p0span1Text0.TextContent);
        }
Ejemplo n.º 21
0
 /// <summary>
 /// Loads a document from the given URL.
 /// </summary>
 /// <param name="url">
 /// A valid URL e.g. http://www.w3.org/TR/2001/CR-css3-selectors-20011113/.
 /// </param>
 /// <returns>The current instance.</returns>
 public CssSelectorTest LoadFromUrl(string url)
 {
     Console.Write("Loading ");
     Console.Write(url);
     Console.Write(" . . . ");
     _url = url;
     HttpClient client = new HttpClient();
     var content = client.GetAsync(url).Result.Content.ReadAsStreamAsync().Result;
     _doc = DocumentBuilder.Html(content);
     Console.WriteLine("loaded !");
     return this;
 }
Ejemplo n.º 22
0
        /// <summary>
        /// Creates a new instance of the HTML parser with the specified document
        /// based on the given source manager.
        /// </summary>
        /// <param name="document">The document instance to be constructed.</param>
        /// <param name="source">The source to use.</param>
        internal HtmlParser(HTMLDocument document, SourceManager source)
        {
            tokenizer = new HtmlTokenizer(source);

            tokenizer.ErrorOccurred += (s, ev) =>
            {
                if (ErrorOccurred != null)
                    ErrorOccurred(this, ev);
            };

            started = false;
            doc = document;
            open = new List<Element>();
            formatting = new List<Element>();
            frameset = true;
            insert = HtmlTreeMode.Initial;
        }
Ejemplo n.º 23
0
 public void HtmlCustomTitleGeneration()
 {
     var doc = new HTMLDocument();
     var title = "My Title";
     doc.Title = title;
     Assert.AreEqual(title, doc.Title);
 }
Ejemplo n.º 24
0
 public void HtmlHasRightHeadElement()
 {
     var doc = new HTMLDocument();
     var root = new HTMLHtmlElement();
     doc.AppendChild(root);
     var head = new HTMLHeadElement();
     root.AppendChild(head);
     Assert.AreEqual(head, doc.Head);
 }
Ejemplo n.º 25
0
 protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel)
 {
     Status = "Constructing the DOM ...";
     Root = new DOMNodeViewModel(document);
     await Task.Yield();
 }
Ejemplo n.º 26
0
        /// <summary>
        /// Generates the unit tests from the given overview document.
        /// </summary>
        /// <param name="document">The document to inspect.</param>
        public static XMLConfTest GenerateFromHtml(HTMLDocument document)
        {
            var body = document.Body;
            var mode = Mode.None;
            var test = new XMLConfTest();

            for (int i = 0; i < body.ChildNodes.Length; i++)
            {
                var child = body.ChildNodes[i];

                if (child is HTMLAnchorElement && child.Attributes["name"] != null)
                {
                    switch (child.Attributes["name"].Value)
                    {
                        case "valid":
                            mode = Mode.Valid;
                            break;
                        case "invalid":
                            mode = Mode.Invalid;
                            break;
                        case "not-wf":
                            mode = Mode.NotWf;
                            break;
                        case "error":
                            mode = Mode.Error;
                            break;
                        default:
                            continue;
                    }

                    test.tests.Add(mode, new List<Entry>());
                }
                else if (mode != Mode.None && child is HTMLTableElement)
                {
                    var list = test.tests[mode];
                    Inspect((HTMLTableElement)child, list);
                }
            }

            return test;
        }
Ejemplo n.º 27
0
 public override Node CloneNode(Boolean deep = true)
 {
     var node = new HTMLDocument();
     CopyProperties(this, node, deep);
     CopyDocumentProperties(this, node, deep);
     node.embedded = this.embedded;
     node.scripting = this.scripting;
     return node;
 }
Ejemplo n.º 28
0
 public void HtmlHasRightBodyElement()
 {
     var doc = new HTMLDocument();
     var root = new HTMLHtmlElement();
     doc.AppendChild(root);
     var body = new HTMLBodyElement();
     root.AppendChild(body);
     Assert.AreEqual(body, doc.Body);
 }
Ejemplo n.º 29
0
 protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel)
 {
     await Task.Yield();
 }
Ejemplo n.º 30
0
 public override Node CloneNode(Boolean deep = true)
 {
     var node = new HTMLDocument();
     CopyProperties(this, node, deep);
     CopyDocumentProperties(this, node, deep);
     return node;
 }