protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel) { State = Brushes.LightGray; this.document = document; ChangeQuery(); await Task.Yield(); }
protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel) { var elements = new Dictionary<String, Int32>(); var attributes = new Dictionary<String, Int32>(); var classes = new Dictionary<String, Int32>(); var words = new Dictionary<String, Int32>(); var various = new Dictionary<String, Int32>(); Status = "Gathering statistics ..."; various.Add("Links", document.Anchors.Length); various.Add("Images", document.Images.Length); various.Add("Scripts", document.Scripts.Length); various.Add("Stylesheets", document.StyleSheets.Length); various.Add("Embeds", document.Embeds.Length); various.Add("Forms", document.Forms.Length); await Task.Run(() => Inspect(document.DocumentElement, elements, classes, attributes)); cancel.ThrowIfCancellationRequested(); await Task.Run(() => Words(document.DocumentElement.TextContent.ToCharArray(), words)); cancel.ThrowIfCancellationRequested(); MostElements = CreatePieChart("Most elements", elements); MostClasses = CreatePieChart("Most classes", classes); MostWords = CreatePieChart("Most words", words); MostAttributes = CreatePieChart("Most attributes", attributes); }
/// <summary> /// Creates a new builder with the specified source. /// </summary> /// <param name="source">The code manager.</param> /// <param name="document">The document to fill.</param> /// <param name="options">Options to use for the document generation.</param> DocumentBuilder(SourceManager source, HTMLDocument document, DocumentOptions options) { document.Options = options; parser = new HtmlParser(document, source); parser.ErrorOccurred += ParseErrorOccurred; if (options.OnError != null) parser.ErrorOccurred += options.OnError; }
protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel) { this.url = url; Status = "Rendering ..."; Reset(); root.Blocks.Add(RenderBox(document.Body)); await Task.Yield(); }
public Document CreateDocument(String namespaceURI, String qualifiedName, DocumentType doctype) { Document doc = null; if (Namespaces.Html == namespaceURI) doc = new HTMLDocument(); else doc = new Document(); doc.AppendChild(doctype); doc.NodeName = qualifiedName ?? doc.NodeName; return doc; }
protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel) { nodes.Clear(); Status = "Constructing tree ..."; var elements = TreeNodeViewModel.SelectFrom(document.ChildNodes); foreach (var element in elements) { element.Parent = nodes; nodes.Add(element); } await Task.Yield(); }
/// <summary> /// Loads a document from the given source. /// </summary> /// <param name="source"> /// A valid HTML document. /// </param> /// <returns>The current instance.</returns> public CssSelectorTest LoadFromSource(String source) { _doc = DocumentBuilder.Html(source); return this; }
/// <summary> /// Builds a list of nodes according with 8.4 Parsing HTML fragments. /// </summary> /// <param name="sourceCode">The string to use as source code.</param> /// <param name="context">The context node to use.</param> /// <returns>A list of parsed nodes.</returns> public static NodeList HtmlFragment(String sourceCode, Node context = null) { var source = new SourceManager(sourceCode); var doc = new HTMLDocument(); var db = new DocumentBuilder(source, doc); if (context != null) { if (context.OwnerDocument != null && context.OwnerDocument.QuirksMode != QuirksMode.Off) doc.QuirksMode = context.OwnerDocument.QuirksMode; // Note: For performance reasons, an implementation that does not report errors and that uses // the actual state machine described in this specification directly could use the // PLAINTEXT state instead of the RAWTEXT and script data states where those are mentioned // in the list above. Except for rules regarding parse errors, they are equivalent, since // there is no appropriate end tag token in the fragment case, yet they involve far // fewer state transitions. ((HtmlParser)db.parser).SwitchToFragment(context); return db.HtmlResult.DocumentElement.ChildNodes; } return db.HtmlResult.ChildNodes; }
/// <summary> /// Creates a new builder with the specified source. /// </summary> /// <param name="source">The code manager.</param> /// <param name="document">The document to fill.</param> DocumentBuilder(SourceManager source, HTMLDocument document) { parser = new HtmlParser(document, source); parser.ErrorOccurred += ParseErrorOccurred; }
/// <summary> /// Builds a list of nodes according with 8.4 Parsing HTML fragments. /// </summary> /// <param name="sourceCode">The string to use as source code.</param> /// <param name="context">[Optional] The context node to use.</param> /// <param name="options">[Optional] Options to use for the document generation.</param> /// <returns>A list of parsed nodes.</returns> public static NodeList HtmlFragment(String sourceCode, Node context = null, DocumentOptions options = null) { var source = new SourceManager(sourceCode); var doc = new HTMLDocument(); //Disable scripting for HTML fragments (security reasons) options = options ?? new DocumentOptions(scripting: false); var db = new DocumentBuilder(source, doc, options); if (context != null) { if (context.OwnerDocument != null && context.OwnerDocument.QuirksMode != QuirksMode.Off) doc.QuirksMode = context.OwnerDocument.QuirksMode; var parser = (HtmlParser)db.parser; parser.SwitchToFragment(context); return parser.Result.DocumentElement.ChildNodes; } return db.HtmlResult.ChildNodes; }
public void TreeNoScriptWithNoScriptCommentInside() { var doc = new HTMLDocument(); var parser = new HtmlParser(doc, @"<noscript><!--<noscript></noscript>--></noscript>"); doc.Options = new DocumentOptions(scripting: true); parser.Parse(); var dochtml0 = doc.ChildNodes[0]; Assert.AreEqual(2, dochtml0.ChildNodes.Length); Assert.AreEqual(0, dochtml0.Attributes.Length); Assert.AreEqual("html", dochtml0.NodeName); Assert.AreEqual(NodeType.Element, dochtml0.NodeType); var dochtml0head0 = dochtml0.ChildNodes[0]; Assert.AreEqual(1, dochtml0head0.ChildNodes.Length); Assert.AreEqual(0, dochtml0head0.Attributes.Length); Assert.AreEqual("head", dochtml0head0.NodeName); Assert.AreEqual(NodeType.Element, dochtml0head0.NodeType); var dochtml0head0noscript0 = dochtml0head0.ChildNodes[0]; Assert.AreEqual(1, dochtml0head0noscript0.ChildNodes.Length); Assert.AreEqual(0, dochtml0head0noscript0.Attributes.Length); Assert.AreEqual("noscript", dochtml0head0noscript0.NodeName); Assert.AreEqual(NodeType.Element, dochtml0head0noscript0.NodeType); var dochtml0head0noscript0Text0 = dochtml0head0noscript0.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml0head0noscript0Text0.NodeType); Assert.AreEqual("<!--<noscript>", dochtml0head0noscript0Text0.TextContent); var dochtml0body1 = dochtml0.ChildNodes[1]; Assert.AreEqual(1, dochtml0body1.ChildNodes.Length); Assert.AreEqual(0, dochtml0body1.Attributes.Length); Assert.AreEqual("body", dochtml0body1.NodeName); Assert.AreEqual(NodeType.Element, dochtml0body1.NodeType); var dochtml0body1Text0 = dochtml0body1.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml0body1Text0.NodeType); Assert.AreEqual("-->", dochtml0body1Text0.TextContent); }
public void Setup() { document = DocumentBuilder.Html(Assets.QuirksMode); }
/// <summary> /// Loads a HTML document from the given URL. /// </summary> /// <param name="url">The URL that hosts the HTML content.</param> /// <returns>The document with the parsed content.</returns> public static HTMLDocument LoadFromURL(String url) { var doc = new HTMLDocument(); return doc.Load(url); }
public void ScriptNoScriptAfterDoctypeWithIFrameContentAndTextAfter() { var doc = new HTMLDocument(); var parser = new HtmlParser(doc, "<!doctype html><noscript><iframe></noscript>X"); doc.Options = new DocumentOptions(scripting: true); parser.Parse(); var docType0 = doc.ChildNodes[0] as DocumentType; Assert.IsNotNull(docType0); Assert.AreEqual(NodeType.DocumentType, docType0.NodeType); Assert.AreEqual(@"html", docType0.Name); var dochtml1 = doc.ChildNodes[1]; Assert.AreEqual(2, dochtml1.ChildNodes.Length); Assert.AreEqual(0, dochtml1.Attributes.Length); Assert.AreEqual("html", dochtml1.NodeName); Assert.AreEqual(NodeType.Element, dochtml1.NodeType); var dochtml1head0 = dochtml1.ChildNodes[0]; Assert.AreEqual(1, dochtml1head0.ChildNodes.Length); Assert.AreEqual(0, dochtml1head0.Attributes.Length); Assert.AreEqual("head", dochtml1head0.NodeName); Assert.AreEqual(NodeType.Element, dochtml1head0.NodeType); var dochtml1head0noscript0 = dochtml1head0.ChildNodes[0]; Assert.AreEqual(1, dochtml1head0noscript0.ChildNodes.Length); Assert.AreEqual(0, dochtml1head0noscript0.Attributes.Length); Assert.AreEqual("noscript", dochtml1head0noscript0.NodeName); Assert.AreEqual(NodeType.Element, dochtml1head0noscript0.NodeType); var dochtml1head0noscript0Text0 = dochtml1head0noscript0.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml1head0noscript0Text0.NodeType); Assert.AreEqual("<iframe>", dochtml1head0noscript0Text0.TextContent); var dochtml1body1 = dochtml1.ChildNodes[1]; Assert.AreEqual(1, dochtml1body1.ChildNodes.Length); Assert.AreEqual(0, dochtml1body1.Attributes.Length); Assert.AreEqual("body", dochtml1body1.NodeName); Assert.AreEqual(NodeType.Element, dochtml1body1.NodeType); var dochtml1body1Text0 = dochtml1body1.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml1body1Text0.NodeType); Assert.AreEqual("X", dochtml1body1Text0.TextContent); }
public void ScriptNoScriptWithCommentStartAndTextInsideBeforeClosing() { var doc = new HTMLDocument(); var parser = new HtmlParser(doc, "<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>"); doc.Options = new DocumentOptions(scripting: true); parser.Parse(); var docType0 = doc.ChildNodes[0] as DocumentType; Assert.IsNotNull(docType0); Assert.AreEqual(NodeType.DocumentType, docType0.NodeType); Assert.AreEqual(@"html", docType0.Name); var dochtml1 = doc.ChildNodes[1]; Assert.AreEqual(2, dochtml1.ChildNodes.Length); Assert.AreEqual(0, dochtml1.Attributes.Length); Assert.AreEqual("html", dochtml1.NodeName); Assert.AreEqual(NodeType.Element, dochtml1.NodeType); var dochtml1head0 = dochtml1.ChildNodes[0]; Assert.AreEqual(1, dochtml1head0.ChildNodes.Length); Assert.AreEqual(0, dochtml1head0.Attributes.Length); Assert.AreEqual("head", dochtml1head0.NodeName); Assert.AreEqual(NodeType.Element, dochtml1head0.NodeType); var dochtml1head0noscript0 = dochtml1head0.ChildNodes[0]; Assert.AreEqual(1, dochtml1head0noscript0.ChildNodes.Length); Assert.AreEqual(0, dochtml1head0noscript0.Attributes.Length); Assert.AreEqual("noscript", dochtml1head0noscript0.NodeName); Assert.AreEqual(NodeType.Element, dochtml1head0noscript0.NodeType); var dochtml1head0noscript0Text0 = dochtml1head0noscript0.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml1head0noscript0Text0.NodeType); Assert.AreEqual("<!--", dochtml1head0noscript0Text0.TextContent); var dochtml1body1 = dochtml1.ChildNodes[1]; Assert.AreEqual(2, dochtml1body1.ChildNodes.Length); Assert.AreEqual(0, dochtml1body1.Attributes.Length); Assert.AreEqual("body", dochtml1body1.NodeName); Assert.AreEqual(NodeType.Element, dochtml1body1.NodeType); var dochtml1body1Text0 = dochtml1body1.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml1body1Text0.NodeType); Assert.AreEqual("X", dochtml1body1Text0.TextContent); var dochtml1body1noscript1 = dochtml1body1.ChildNodes[1]; Assert.AreEqual(1, dochtml1body1noscript1.ChildNodes.Length); Assert.AreEqual(0, dochtml1body1noscript1.Attributes.Length); Assert.AreEqual("noscript", dochtml1body1noscript1.NodeName); Assert.AreEqual(NodeType.Element, dochtml1body1noscript1.NodeType); var dochtml1body1noscript1Text0 = dochtml1body1noscript1.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml1body1noscript1Text0.NodeType); Assert.AreEqual("-->", dochtml1body1noscript1Text0.TextContent); }
protected abstract Task Use(Uri url, HTMLDocument document, CancellationToken cancel);
protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel) { local = url; Selected = null; source.Clear(); Status = "Looking for stylesheets ..."; for (int i = 0; i < document.StyleSheets.Length; i++) { var s = document.StyleSheets[i]; source.Add(s); } await Task.Yield(); }
void UpdateRecent(HTMLDocument document, String url, Uri uri) { recentAddress = url; SettingsViewModel.Instance.AddUrl(url); recentDocument = document; recentUrl = uri; recentState++; state = recentState; }
public void Setup() { document = DocumentBuilder.Html(Assets.SelectorsWebpage); }
public void TreeParagraphWithTightAttributesAndNoScriptTagScriptingEnabled() { var doc = new HTMLDocument(); var parser = new HtmlParser(doc, @"<p id=""status""><noscript><strong>A</strong></noscript><span>B</span></p>"); doc.Options = new DocumentOptions(scripting: true); parser.Parse(); var dochtml0 = doc.ChildNodes[0]; Assert.AreEqual(2, dochtml0.ChildNodes.Length); Assert.AreEqual(0, dochtml0.Attributes.Length); Assert.AreEqual("html", dochtml0.NodeName); Assert.AreEqual(NodeType.Element, dochtml0.NodeType); var dochtml0head0 = dochtml0.ChildNodes[0]; Assert.AreEqual(0, dochtml0head0.ChildNodes.Length); Assert.AreEqual(0, dochtml0head0.Attributes.Length); Assert.AreEqual("head", dochtml0head0.NodeName); Assert.AreEqual(NodeType.Element, dochtml0head0.NodeType); var dochtml0body1 = dochtml0.ChildNodes[1]; Assert.AreEqual(1, dochtml0body1.ChildNodes.Length); Assert.AreEqual(0, dochtml0body1.Attributes.Length); Assert.AreEqual("body", dochtml0body1.NodeName); Assert.AreEqual(NodeType.Element, dochtml0body1.NodeType); var dochtml0body1p0 = dochtml0body1.ChildNodes[0]; Assert.AreEqual(2, dochtml0body1p0.ChildNodes.Length); Assert.AreEqual(1, dochtml0body1p0.Attributes.Length); Assert.AreEqual("p", dochtml0body1p0.NodeName); Assert.AreEqual(NodeType.Element, dochtml0body1p0.NodeType); Assert.AreEqual("status", dochtml0body1p0.Attributes["id"].Value); var dochtml0body1p0noscript0 = dochtml0body1p0.ChildNodes[0]; Assert.AreEqual(1, dochtml0body1p0noscript0.ChildNodes.Length); Assert.AreEqual(0, dochtml0body1p0noscript0.Attributes.Length); Assert.AreEqual("noscript", dochtml0body1p0noscript0.NodeName); Assert.AreEqual(NodeType.Element, dochtml0body1p0noscript0.NodeType); var dochtml0body1p0noscript0Text0 = dochtml0body1p0noscript0.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml0body1p0noscript0Text0.NodeType); Assert.AreEqual("<strong>A</strong>", dochtml0body1p0noscript0Text0.TextContent); var dochtml0body1p0span1 = dochtml0body1p0.ChildNodes[1]; Assert.AreEqual(1, dochtml0body1p0span1.ChildNodes.Length); Assert.AreEqual(0, dochtml0body1p0span1.Attributes.Length); Assert.AreEqual("span", dochtml0body1p0span1.NodeName); Assert.AreEqual(NodeType.Element, dochtml0body1p0span1.NodeType); var dochtml0body1p0span1Text0 = dochtml0body1p0span1.ChildNodes[0]; Assert.AreEqual(NodeType.Text, dochtml0body1p0span1Text0.NodeType); Assert.AreEqual("B", dochtml0body1p0span1Text0.TextContent); }
/// <summary> /// Loads a document from the given URL. /// </summary> /// <param name="url"> /// A valid URL e.g. http://www.w3.org/TR/2001/CR-css3-selectors-20011113/. /// </param> /// <returns>The current instance.</returns> public CssSelectorTest LoadFromUrl(string url) { Console.Write("Loading "); Console.Write(url); Console.Write(" . . . "); _url = url; HttpClient client = new HttpClient(); var content = client.GetAsync(url).Result.Content.ReadAsStreamAsync().Result; _doc = DocumentBuilder.Html(content); Console.WriteLine("loaded !"); return this; }
/// <summary> /// Creates a new instance of the HTML parser with the specified document /// based on the given source manager. /// </summary> /// <param name="document">The document instance to be constructed.</param> /// <param name="source">The source to use.</param> internal HtmlParser(HTMLDocument document, SourceManager source) { tokenizer = new HtmlTokenizer(source); tokenizer.ErrorOccurred += (s, ev) => { if (ErrorOccurred != null) ErrorOccurred(this, ev); }; started = false; doc = document; open = new List<Element>(); formatting = new List<Element>(); frameset = true; insert = HtmlTreeMode.Initial; }
public void HtmlCustomTitleGeneration() { var doc = new HTMLDocument(); var title = "My Title"; doc.Title = title; Assert.AreEqual(title, doc.Title); }
public void HtmlHasRightHeadElement() { var doc = new HTMLDocument(); var root = new HTMLHtmlElement(); doc.AppendChild(root); var head = new HTMLHeadElement(); root.AppendChild(head); Assert.AreEqual(head, doc.Head); }
protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel) { Status = "Constructing the DOM ..."; Root = new DOMNodeViewModel(document); await Task.Yield(); }
/// <summary> /// Generates the unit tests from the given overview document. /// </summary> /// <param name="document">The document to inspect.</param> public static XMLConfTest GenerateFromHtml(HTMLDocument document) { var body = document.Body; var mode = Mode.None; var test = new XMLConfTest(); for (int i = 0; i < body.ChildNodes.Length; i++) { var child = body.ChildNodes[i]; if (child is HTMLAnchorElement && child.Attributes["name"] != null) { switch (child.Attributes["name"].Value) { case "valid": mode = Mode.Valid; break; case "invalid": mode = Mode.Invalid; break; case "not-wf": mode = Mode.NotWf; break; case "error": mode = Mode.Error; break; default: continue; } test.tests.Add(mode, new List<Entry>()); } else if (mode != Mode.None && child is HTMLTableElement) { var list = test.tests[mode]; Inspect((HTMLTableElement)child, list); } } return test; }
public override Node CloneNode(Boolean deep = true) { var node = new HTMLDocument(); CopyProperties(this, node, deep); CopyDocumentProperties(this, node, deep); node.embedded = this.embedded; node.scripting = this.scripting; return node; }
public void HtmlHasRightBodyElement() { var doc = new HTMLDocument(); var root = new HTMLHtmlElement(); doc.AppendChild(root); var body = new HTMLBodyElement(); root.AppendChild(body); Assert.AreEqual(body, doc.Body); }
protected override async Task Use(Uri url, HTMLDocument document, CancellationToken cancel) { await Task.Yield(); }
public override Node CloneNode(Boolean deep = true) { var node = new HTMLDocument(); CopyProperties(this, node, deep); CopyDocumentProperties(this, node, deep); return node; }