Example #1
0
        static void extractTables(String html)
        {
            Document doc = Dcsoup.ParseBodyFragment(html, "");

            App.dates  = new List <DateTime>();
            App.labs   = new List <string>();
            App.sports = new List <string>();

            System.Diagnostics.Debug.WriteLine("Dates:");
            Supremes.Nodes.Element table = doc.Select("body > div:nth-child(2)").First;
            if (table != null)
            {
                string[] pairs = table.Text.Split(';');
                foreach (string s in pairs)
                {
                    if (s != "" && s != null)
                    {
                        int i = Convert.ToInt16(s.Split(':').First());
                        App.dates.Add(DateTime.ParseExact(s.Split(':').Last(), "yyyy-MM-dd", CultureInfo.InvariantCulture));
                        System.Diagnostics.Debug.WriteLine(s);
                    }
                }
            }


            System.Diagnostics.Debug.WriteLine("Labs:");
            table = doc.Select("body > div:nth-child(3)").First;
            if (table != null)
            {
                string[] pairs = table.Text.Split(';');
                foreach (string s in pairs)
                {
                    if (s != "" && s != null)
                    {
                        int i = Convert.ToInt16(s.Split(':').First());
                        App.labs.Add(s.Split(':').Last());
                        System.Diagnostics.Debug.WriteLine(s);
                    }
                }
            }


            System.Diagnostics.Debug.WriteLine("Sports:");
            table = doc.Select("body > div:nth-child(4)").First;
            if (table != null)
            {
                string[] pairs = table.Text.Split(';');
                foreach (string s in pairs)
                {
                    if (s != "" && s != null)
                    {
                        int i = Convert.ToInt16(s.Split(':').First());
                        App.sports.Add(s.Split(':').Last());
                        System.Diagnostics.Debug.WriteLine(s);
                    }
                }
            }
        }
Example #2
0
 private Selector(string query, Element root)
 {
     Validate.NotNull(query);
     query = query.Trim();
     Validate.NotEmpty(query);
     Validate.NotNull(root);
     this.evaluator = QueryParser.Parse(query);
     this.root = root;
 }
Example #3
0
 public override bool Matches(Element root, Element element)
 {
     foreach (Element e in element.GetAllElements())
     {
         if (e != element && evaluator.Matches(root, e))
         {
             return true;
         }
     }
     return false;
 }
Example #4
0
 public override bool Matches(Element root, Element element)
 {
     for (int i = 0; i < num; i++)
     {
         Evaluator s = evaluators[i];
         if (!s.Matches(root, element))
         {
             return false;
         }
     }
     return true;
 }
Example #5
0
 public override bool Matches(Element root, Element element)
 {
     if (root == element)
     {
         return false;
     }
     Element parent = element.Parent;
     while (parent != root)
     {
         if (evaluator.Matches(root, parent))
         {
             return true;
         }
         parent = parent.Parent;
     }
     return false;
 }
Example #6
0
        public void HandlesBaseUri()
        {
            Tag tag = Tag.ValueOf("a");
            Attributes attribs = new Attributes();
            attribs["relHref"] = "/foo";
            attribs["absHref"] = "http://bar/qux";

            Element noBase = new Element(tag, "", attribs);
            Assert.AreEqual("", noBase.AbsUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
            Assert.AreEqual("http://bar/qux", noBase.AbsUrl("absHref")); // no base but valid attrib, return attrib

            Element withBase = new Element(tag, "http://foo/", attribs);
            Assert.AreEqual("http://foo/foo", withBase.AbsUrl("relHref")); // construct abs from base + rel
            Assert.AreEqual("http://bar/qux", withBase.AbsUrl("absHref")); // href is abs, so returns that
            Assert.AreEqual("", withBase.AbsUrl("noval"));

            Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
            Assert.AreEqual("http://bar/qux", dodgyBase.AbsUrl("absHref")); // base fails, but href good, so get that
            Assert.AreEqual("", dodgyBase.AbsUrl("relHref")); // base fails, only rel href, so return nothing 
        }
Example #7
0
 /// <summary>
 /// Create a new element by tag name, and add it as the first child.
 /// </summary>
 /// <param name="tagName">
 /// the name of the tag (e.g.
 /// <c>div</c>
 /// ).
 /// </param>
 /// <returns>
 /// the new element, to allow you to add content to it, e.g.:
 /// <c>parent.PrependElement("h1").Attr("id", "header").Text("Welcome");</c>
 /// </returns>
 public Element PrependElement(string tagName)
 {
 	Tag tag = Nodes.Tag.ValueOf(tagName);
     Element child = new Element(tag, BaseUri);
     PrependChild(child);
     return child;
 }
Example #8
0
 public override bool Matches(Element root, Element element)
 {
     Element p = element.Parent;
     return p != null && !(p is Document) && element.SiblingElements.Count == 0;
 }
Example #9
0
 /// <summary>
 /// Find elements matching selector.
 /// </summary>
 /// <param name="query">CSS selector</param>
 /// <param name="root">root element to descend into</param>
 /// <returns>matching elements, empty if not</returns>
 public static Elements Select(string query, Element root)
 {
     return new Supremes.Select.Selector(query, root).Select();
 }
Example #10
0
 /// <summary>
 /// Build a list of elements,
 /// by visiting root and every descendant of root, and testing it against the evaluator.
 /// </summary>
 /// <param name="eval">Evaluator to test elements against</param>
 /// <param name="root">root of tree to descend</param>
 /// <returns>list of matches; empty if none</returns>
 public static Elements Collect(Evaluator eval, Element root)
 {
     Elements elements = new Elements();
     new NodeTraversor(new Collector.Accumulator(root, elements, eval)).Traverse(root);
     return elements;
 }
Example #11
0
 /// <summary>
 /// Test if the supplied attribute is allowed by this whitelist for this tag
 /// </summary>
 /// <param name="tagName">tag to consider allowing the attribute in</param>
 /// <param name="el">element under test, to confirm protocol</param>
 /// <param name="attr">attribute under test</param>
 /// <returns>true if allowed</returns>
 internal bool IsSafeAttribute(string tagName, Element el, Nodes.Attribute attr)
 {
     Whitelist.TagName tag = Whitelist.TagName.ValueOf(tagName);
     Whitelist.AttributeKey key = Whitelist.AttributeKey.ValueOf(attr.Key);
     if (attributes.ContainsKey(tag))
     {
         if (attributes[tag].Contains(key))
         {
             if (protocols.ContainsKey(tag))
             {
                 IDictionary<Whitelist.AttributeKey, ICollection<Whitelist.Protocol>> attrProts = protocols[tag];
                 // ok if not defined protocol; otherwise test
                 return !attrProts.ContainsKey(key) || TestValidProtocol(el, attr, attrProts[key]);
             }
             else
             {
                 // attribute found, no protocols defined, so OK
                 return true;
             }
         }
     }
     // no attributes defined for tag, try :all tag
     return !tagName.Equals(":all") && IsSafeAttribute(":all", el, attr);
 }
Example #12
0
 public override bool Matches(Element root, Element element)
 {
     Match m = pattern.Match(element.OwnText);
     return m.Success; /*find*/
 }
Example #13
0
 public override bool Matches(Element root, Element element)
 {
     return (element.HasClass(className));
 }
Example #14
0
 public override bool Matches(Element root, Element element)
 {
     return (element.OwnText.ToLower().Contains(searchText));
 }
Example #15
0
 public override bool Matches(Element root, Element element)
 {
     IReadOnlyList<Node> family = element.ChildNodes;
     for (int i = 0; i < family.Count; i++)
     {
         Node n = family[i];
         if (!(n is Comment || n is XmlDeclaration || n is DocumentType))
         {
             return false;
         }
     }
     return true;
 }
Example #16
0
 public override bool Matches(Element root, Element element)
 {
     Element p = element.Parent;
     if (p == null || p is Document)
     {
         return false;
     }
     int pos = 0;
     Elements family = p.Children;
     for (int i = 0; i < family.Count; i++)
     {
         if (family[i].Tag.Equals(element.Tag))
         {
             pos++;
         }
     }
     return pos == 1;
 }
Example #17
0
 private static int IndexInList(Element search, IList<Element> elements)
 {
     Validate.NotNull(search);
     Validate.NotNull(elements);
     return elements.IndexOf(search); // compare using Equals() method
 }
Example #18
0
 internal Accumulator(Element root, Elements elements, Evaluator eval)
 {
     this.root = root;
     this.elements = elements;
     this.eval = eval;
 }
Example #19
0
 /// <summary>
 /// Parse a fragment of HTML into a list of nodes.
 /// </summary>
 /// <remarks>
 /// The context element, if supplied, supplies parsing context.
 /// </remarks>
 /// <param name="fragmentHtml">the fragment of HTML to parse</param>
 /// <param name="context">
 /// (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This
 /// provides stack context (for implicit element creation).
 /// </param>
 /// <param name="baseUri">
 /// base URI of document (i.e. original fetch location), for resolving relative URLs.
 /// </param>
 /// <returns>
 /// list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.
 /// </returns>
 public static IReadOnlyList<Node> ParseFragment(string fragmentHtml, Element context, string baseUri)
 {
     HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();
     return treeBuilder.ParseFragment(fragmentHtml, context, baseUri, ParseErrorList.NoTracking());
 }
Example #20
0
        static void extratMarks(String html)
        {
            Document doc = Dcsoup.ParseBodyFragment(html, "");

            User currentUser = new User(false);

            int Column = 0;

            for (int i = 1; ; i++)
            {
                Supremes.Nodes.Element table = doc.Select("body > div.content > div.scrolling-content > table > tbody > tr:nth-child(" + i + ")").First;

                if (table == null)
                {
                    break;
                }


                Elements inputElements = table.GetElementsByTag("td");
                foreach (Supremes.Nodes.Element inputElement in inputElements)
                {
                    if (Column == 0)
                    {
                        currentUser      = new User(true);
                        currentUser.name = inputElement.Text;
                        System.Diagnostics.Debug.WriteLine(inputElement.Text);
                        Column++;
                    }
                    else if (Column == 1)
                    {
                        currentUser.eta = Convert.ToInt16(inputElement.Text);
                        Column++;
                    }
                    else if (Column == 2)
                    {
                        currentUser.lab = Convert.ToInt32(inputElement.Attr("value"));
                        Column++;
                    }
                    else if (Column == 3)
                    {
                        if (Convert.ToInt32(inputElement.Attr("value")) == 1)
                        {
                            currentUser.presences[0] = true;
                        }
                        else
                        {
                            currentUser.presences[0] = false;
                        }

                        Column++;
                    }
                    else if (Column == 4)
                    {
                        currentUser.sports[0] = Convert.ToInt32(inputElement.Attr("value"));
                        Column++;
                    }
                    else if (Column == 5)
                    {
                        if (Convert.ToInt32(inputElement.Attr("value")) == 1)
                        {
                            currentUser.presences[1] = true;
                        }
                        else
                        {
                            currentUser.presences[1] = false;
                        }

                        Column++;
                    }
                    else if (Column == 6)
                    {
                        currentUser.sports[1] = Convert.ToInt32(inputElement.Attr("value"));
                        Column++;
                    }
                    else if (Column == 7)
                    {
                        if (Convert.ToInt32(inputElement.Attr("value")) == 1)
                        {
                            currentUser.presences[2] = true;
                        }
                        else
                        {
                            currentUser.presences[2] = false;
                        }

                        Column++;
                    }
                    else if (Column == 8)
                    {
                        currentUser.sports[2] = Convert.ToInt32(inputElement.Attr("value"));
                        Column++;
                    }
                    else if (Column == 9)
                    {
                        if (Convert.ToInt32(inputElement.Attr("value")) == 1)
                        {
                            currentUser.presences[3] = true;
                        }
                        else
                        {
                            currentUser.presences[3] = false;
                        }

                        Column++;
                    }
                    else if (Column == 10)
                    {
                        currentUser.sports[3] = Convert.ToInt32(inputElement.Attr("value"));
                        Column = 0;
                    }
                }
            }
        }
Example #21
0
 private bool TestValidProtocol(Element el, Nodes.Attribute attr, ICollection<Whitelist.Protocol> protocols)
 {
     // try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
     // rels without a baseuri get removed
     string value = el.AbsUrl(attr.Key);
     if (value.Length == 0)
     {
         value = attr.Value;
     }
     // if it could not be made abs, run as-is to allow custom unknown protocols
     if (!preserveRelativeLinks)
     {
         attr.Value = value;
     }
     foreach (Whitelist.Protocol protocol in protocols)
     {
         string prot = protocol.ToString() + ":";
         if (value.ToLower().StartsWith(prot, StringComparison.Ordinal))
         {
             return true;
         }
     }
     return false;
 }
Example #22
0
        public void OrphanNodeReturnsNullForSiblingElements()
        {
            Node node = new Element(Tag.ValueOf("p"), "");
            Element el = new Element(Tag.ValueOf("p"), "");

            Assert.AreEqual(0, node.SiblingIndex);
            Assert.AreEqual(0, node.SiblingNodes.Count);

            Assert.IsNull(node.PreviousSibling);
            Assert.IsNull(node.NextSibling);

            Assert.AreEqual(0, el.SiblingElements.Count);
            Assert.IsNull(el.PreviousElementSibling);
            Assert.IsNull(el.NextElementSibling);
        }
Example #23
0
 public override bool Matches(Element root, Element element)
 {
     return (id.Equals(element.Id));
 }
Example #24
0
        // merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html>

        private void NormaliseStructure(string tag, Element htmlEl)
        {
            Elements elements = this.GetElementsByTag(tag);
            Element master = elements.First;
            // will always be available as created above if not existent
            if (elements.Count > 1)
            {
                // dupes, move contents to master
                List<Node> toMove = new List<Node>();
                for (int i = 1; i < elements.Count; i++)
                {
                    Node dupe = elements[i];
                    foreach (Node node in dupe.ChildNodes)
                    {
                        toMove.Add(node);
                    }
                    dupe.Remove();
                }
                foreach (Node dupe_1 in toMove)
                {
                    master.AppendChild(dupe_1);
                }
            }
            // ensure parented by <html>
            if (!master.Parent.Equals(htmlEl))
            {
                htmlEl.AppendChild(master);
            }
        }
Example #25
0
        public void After()
        {
            Document doc = Dcsoup.Parse("<p>One <b>two</b> three</p>");
            Element newNode = new Element(Tag.ValueOf("em"), "");
            newNode.AppendText("four");

            doc.Select("b").First.After(newNode);
            Assert.AreEqual("<p>One <b>two</b><em>four</em> three</p>", doc.Body.Html);

            doc.Select("b").First.After("<i>five</i>");
            Assert.AreEqual("<p>One <b>two</b><i>five</i><em>four</em> three</p>", doc.Body.Html);
        }
Example #26
0
 public override bool Matches(Element root, Element element)
 {
     Element r = root is Document ? root.Child(0) : root;
     return element == r;
 }
Example #27
0
        // does not recurse.

        private void NormaliseTextNodes(Element element)
        {
            List<Node> toMove = element.childNodes
                .OfType<TextNode>()
                .Where(n => !n.IsBlank)
                .Cast<Node>()
                .ToList();
            for (int i = toMove.Count - 1; i >= 0; i--)
            {
                Node node_1 = toMove[i];
                element.RemoveChild(node_1);
                Body.PrependChild(new TextNode(" ", string.Empty));
                Body.PrependChild(node_1);
            }
        }
Example #28
0
 /// <summary>
 /// Add a form control element to this form.
 /// </summary>
 /// <param name="element">form control to add</param>
 /// <returns>this form element, for chaining</returns>
 public FormElement AddElement(Element element)
 {
     elements.Add(element);
     return this;
 }
Example #29
0
            internal override bool Process(Token t, HtmlTreeBuilder tb)
            {
                switch (t.type)
                {
                    case TokenType.Character:
                    {
                        Token.Character c = t.AsCharacter();
                        if (c.GetData().Equals(HtmlTreeBuilderState.nullString))
                        {
                            // todo confirm that check
                            tb.Error(this);
                            return false;
                        }
                        else if (tb.FramesetOk() && HtmlTreeBuilderState.IsWhitespace(c))
                        {
                            // don't check if whitespace if frames already closed
                            tb.ReconstructFormattingElements();
                            tb.Insert(c);
                        }
                        else
                        {
                            tb.ReconstructFormattingElements();
                            tb.Insert(c);
                            tb.FramesetOk(false);
                        }
                        break;
                    }

                    case TokenType.Comment:
                    {
                        tb.Insert(t.AsComment());
                        break;
                    }

                    case TokenType.Doctype:
                    {
                        tb.Error(this);
                        return false;
                    }

                    case TokenType.StartTag:
                    {
                        Token.StartTag startTag = t.AsStartTag();
                        string name = startTag.Name();
                        if (name.Equals("html"))
                        {
                            tb.Error(this);
                            // merge attributes onto real html
                            Element html = tb.GetStack().First.Value;
                            foreach (Supremes.Nodes.Attribute attribute in startTag.GetAttributes())
                            {
                                if (!html.HasAttr(attribute.Key))
                                {
                                    html.Attributes.Put(attribute);
                                }
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartToHead))
                        {
                            return tb.Process(t, HtmlTreeBuilderState.InHead);
                        }
                        else if (name.Equals("body"))
                        {
                            tb.Error(this);
                            IList<Element> stack = tb.GetStack();
                            if (stack.Count == 1 || (stack.Count > 2 && !stack[1].NodeName.Equals("body")))
                            {
                                // only in fragment case
                                return false;
                            }
                            else
                            {
                                // ignore
                                tb.FramesetOk(false);
                                Element body = stack[1];
                                foreach (Supremes.Nodes.Attribute attribute in startTag.GetAttributes())
                                {
                                    if (!body.HasAttr(attribute.Key))
                                    {
                                        body.Attributes.Put(attribute);
                                    }
                                }
                            }
                        }
                        else if (name.Equals("frameset"))
                        {
                            tb.Error(this);
                            var stack = tb.GetStack();
                            if (stack.Count == 1 || (stack.Count > 2 && !stack[1].NodeName.Equals("body")))
                            {
                                // only in fragment case
                                return false;
                                // ignore
                            }
                            else if (!tb.FramesetOk())
                            {
                                return false;
                                // ignore frameset
                            }
                            else
                            {
                                Element second = stack[1];
                                if (second.Parent != null)
                                {
                                    second.Remove();
                                }
                                // pop up to html element
                                while (stack.Count > 1)
                                {
                                    stack.RemoveLast();
                                }
                                tb.Insert(startTag);
                                tb.Transition(HtmlTreeBuilderState.InFrameset);
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartPClosers))
                        {
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.Insert(startTag);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.Headings))
                        {
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            if (StringUtil.In(tb.CurrentElement().NodeName, HtmlTreeBuilderState.Constants.Headings))
                            {
                                tb.Error(this);
                                tb.Pop();
                            }
                            tb.Insert(startTag);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartPreListing))
                        {
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.Insert(startTag);
                            // todo: ignore LF if next token
                            tb.FramesetOk(false);
                        }
                        else if (name.Equals("form"))
                        {
                            if (tb.GetFormElement() != null)
                            {
                                tb.Error(this);
                                return false;
                            }
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.InsertForm(startTag, true);
                        }
                        else if (name.Equals("li"))
                        {
                            tb.FramesetOk(false);
                            IList<Element> stack = tb.GetStack();
                            for (int i = stack.Count - 1; i > 0; i--)
                            {
                                Element el = stack[i];
                                if (el.NodeName.Equals("li"))
                                {
                                    tb.Process(new Token.EndTag("li"));
                                    break;
                                }
                                if (tb.IsSpecial(el) && !StringUtil.In(el.NodeName, HtmlTreeBuilderState.Constants.InBodyStartLiBreakers))
                                {
                                    break;
                                }
                            }
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.Insert(startTag);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.DdDt))
                        {
                            tb.FramesetOk(false);
                            IList<Element> stack = tb.GetStack();
                            for (int i = stack.Count - 1; i > 0; i--)
                            {
                                Element el = stack[i];
                                if (StringUtil.In(el.NodeName, HtmlTreeBuilderState.Constants.DdDt))
                                {
                                    tb.Process(new Token.EndTag(el.NodeName));
                                    break;
                                }
                                if (tb.IsSpecial(el) && !StringUtil.In(el.NodeName, HtmlTreeBuilderState.Constants
                                    .InBodyStartLiBreakers))
                                {
                                    break;
                                }
                            }
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.Insert(startTag);
                        }
                        else if (name.Equals("plaintext"))
                        {
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.Insert(startTag);
                            tb.tokeniser.Transition(TokeniserState.PLAINTEXT);
                            // once in, never gets out
                        }
                        else if (name.Equals("button"))
                        {
                            if (tb.InButtonScope("button"))
                            {
                                // close and reprocess
                                tb.Error(this);
                                tb.Process(new Token.EndTag("button"));
                                tb.Process(startTag);
                            }
                            else
                            {
                                tb.ReconstructFormattingElements();
                                tb.Insert(startTag);
                                tb.FramesetOk(false);
                            }
                        }
                        else if (name.Equals("a"))
                        {
                            if (tb.GetActiveFormattingElement("a") != null)
                            {
                                tb.Error(this);
                                tb.Process(new Token.EndTag("a"));
                                // still on stack?
                                Element remainingA = tb.GetFromStack("a");
                                if (remainingA != null)
                                {
                                    tb.RemoveFromActiveFormattingElements(remainingA);
                                    tb.RemoveFromStack(remainingA);
                                }
                            }
                            tb.ReconstructFormattingElements();
                            Element a = tb.Insert(startTag);
                            tb.PushActiveFormattingElements(a);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.Formatters))
                        {
                            tb.ReconstructFormattingElements();
                            Element el = tb.Insert(startTag);
                            tb.PushActiveFormattingElements(el);
                        }
                        else if (name.Equals("nobr"))
                        {
                            tb.ReconstructFormattingElements();
                            if (tb.InScope("nobr"))
                            {
                                tb.Error(this);
                                tb.Process(new Token.EndTag("nobr"));
                                tb.ReconstructFormattingElements();
                            }
                            Element el = tb.Insert(startTag);
                            tb.PushActiveFormattingElements(el);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartApplets))
                        {
                            tb.ReconstructFormattingElements();
                            tb.Insert(startTag);
                            tb.InsertMarkerToFormattingElements();
                            tb.FramesetOk(false);
                        }
                        else if (name.Equals("table"))
                        {
                            if (tb.GetDocument().QuirksMode != DocumentQuirksMode.Quirks && tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.Insert(startTag);
                            tb.FramesetOk(false);
                            tb.Transition(HtmlTreeBuilderState.InTable);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartEmptyFormatters))
                        {
                            tb.ReconstructFormattingElements();
                            tb.InsertEmpty(startTag);
                            tb.FramesetOk(false);
                        }
                        else if (name.Equals("input"))
                        {
                            tb.ReconstructFormattingElements();
                            Element el = tb.InsertEmpty(startTag);
                            if (!string.Equals(el.Attr("type"), "hidden", StringComparison.OrdinalIgnoreCase))
                            {
                                tb.FramesetOk(false);
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartMedia))
                        {
                            tb.InsertEmpty(startTag);
                        }
                        else if (name.Equals("hr"))
                        {
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.InsertEmpty(startTag);
                            tb.FramesetOk(false);
                        }
                        else if (name.Equals("image"))
                        {
                            if (tb.GetFromStack("svg") == null)
                            {
                                return tb.Process(startTag.Name("img"));
                            }
                            else
                            {
                                // change <image> to <img>, unless in svg
                                tb.Insert(startTag);
                            }
                        }
                        else if (name.Equals("isindex"))
                        {
                            // how much do we care about the early 90s?
                            tb.Error(this);
                            if (tb.GetFormElement() != null)
                            {
                                return false;
                            }
                            tb.tokeniser.AcknowledgeSelfClosingFlag();
                            tb.Process(new Token.StartTag("form"));
                            if (startTag.attributes.ContainsKey("action"))
                            {
                                Element form = tb.GetFormElement();
                                form.Attr("action", startTag.attributes["action"]);
                            }
                            tb.Process(new Token.StartTag("hr"));
                            tb.Process(new Token.StartTag("label"));
                            // hope you like english.
                            string prompt = startTag.attributes.ContainsKey("prompt") ? startTag.attributes["prompt"] : "This is a searchable index. Enter search keywords: ";
                            tb.Process(new Token.Character(prompt));
                            // input
                            Attributes inputAttribs = new Attributes();
                            foreach (Supremes.Nodes.Attribute attr in startTag.attributes)
                            {
                                if (!StringUtil.In(attr.Key, HtmlTreeBuilderState.Constants.InBodyStartInputAttribs))
                                {
                                    inputAttribs.Put(attr);
                                }
                            }
                            inputAttribs["name"] = "isindex";
                            tb.Process(new Token.StartTag("input", inputAttribs));
                            tb.Process(new Token.EndTag("label"));
                            tb.Process(new Token.StartTag("hr"));
                            tb.Process(new Token.EndTag("form"));
                        }
                        else if (name.Equals("textarea"))
                        {
                            tb.Insert(startTag);
                            // todo: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
                            tb.tokeniser.Transition(TokeniserState.Rcdata);
                            tb.MarkInsertionMode();
                            tb.FramesetOk(false);
                            tb.Transition(HtmlTreeBuilderState.Text);
                        }
                        else if (name.Equals("xmp"))
                        {
                            if (tb.InButtonScope("p"))
                            {
                                tb.Process(new Token.EndTag("p"));
                            }
                            tb.ReconstructFormattingElements();
                            tb.FramesetOk(false);
                            HtmlTreeBuilderState.HandleRawtext(startTag, tb);
                        }
                        else if (name.Equals("iframe"))
                        {
                            tb.FramesetOk(false);
                            HtmlTreeBuilderState.HandleRawtext(startTag, tb);
                        }
                        else if (name.Equals("noembed"))
                        {
                            // also handle noscript if script enabled
                            HtmlTreeBuilderState.HandleRawtext(startTag, tb);
                        }
                        else if (name.Equals("select"))
                        {
                            tb.ReconstructFormattingElements();
                            tb.Insert(startTag);
                            tb.FramesetOk(false);
                            HtmlTreeBuilderState state = tb.State();
                            if (state.Equals(HtmlTreeBuilderState.InTable)
                                || state.Equals(HtmlTreeBuilderState.InCaption)
                                || state.Equals(HtmlTreeBuilderState.InTableBody)
                                || state.Equals(HtmlTreeBuilderState.InRow)
                                || state.Equals(HtmlTreeBuilderState.InCell))
                            {
                                tb.Transition(HtmlTreeBuilderState.InSelectInTable);
                            }
                            else
                            {
                                tb.Transition(HtmlTreeBuilderState.InSelect);
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartOptions))
                        {
                            if (tb.CurrentElement().NodeName.Equals("option"))
                            {
                                tb.Process(new Token.EndTag("option"));
                            }
                            tb.ReconstructFormattingElements();
                            tb.Insert(startTag);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartRuby))
                        {
                            if (tb.InScope("ruby"))
                            {
                                tb.GenerateImpliedEndTags();
                                if (!tb.CurrentElement().NodeName.Equals("ruby"))
                                {
                                    tb.Error(this);
                                    tb.PopStackToBefore("ruby");
                                }
                                // i.e. close up to but not include name
                                tb.Insert(startTag);
                            }
                        }
                        else if (name.Equals("math"))
                        {
                            tb.ReconstructFormattingElements();
                            // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml)
                            tb.Insert(startTag);
                            tb.tokeniser.AcknowledgeSelfClosingFlag();
                        }
                        else if (name.Equals("svg"))
                        {
                            tb.ReconstructFormattingElements();
                            // todo: handle A start tag whose tag name is "svg" (xlink, svg)
                            tb.Insert(startTag);
                            tb.tokeniser.AcknowledgeSelfClosingFlag();
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartDrop))
                        {
                            tb.Error(this);
                            return false;
                        }
                        else
                        {
                            tb.ReconstructFormattingElements();
                            tb.Insert(startTag);

                        }
                        break;
                    }

                    case TokenType.EndTag:
                    {
                        Token.EndTag endTag = t.AsEndTag();
                        string name = endTag.Name();
                        if (name.Equals("body"))
                        {
                            if (!tb.InScope("body"))
                            {
                                tb.Error(this);
                                return false;
                            }
                            else
                            {
                                // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html
                                tb.Transition(HtmlTreeBuilderState.AfterBody);
                            }
                        }
                        else if (name.Equals("html"))
                        {
                            bool notIgnored = tb.Process(new Token.EndTag("body"));
                            if (notIgnored)
                            {
                                return tb.Process(endTag);
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyEndClosers))
                        {
                            if (!tb.InScope(name))
                            {
                                // nothing to close
                                tb.Error(this);
                                return false;
                            }
                            else
                            {
                                tb.GenerateImpliedEndTags();
                                if (!tb.CurrentElement().NodeName.Equals(name))
                                {
                                    tb.Error(this);
                                }
                                tb.PopStackToClose(name);
                            }
                        }
                        else if (name.Equals("form"))
                        {
                            Element currentForm = tb.GetFormElement();
                            tb.SetFormElement(null);
                            if (currentForm == null || !tb.InScope(name))
                            {
                                tb.Error(this);
                                return false;
                            }
                            else
                            {
                                tb.GenerateImpliedEndTags();
                                if (!tb.CurrentElement().NodeName.Equals(name))
                                {
                                    tb.Error(this);
                                }
                                // remove currentForm from stack. will shift anything under up.
                                tb.RemoveFromStack(currentForm);
                            }
                        }
                        else if (name.Equals("p"))
                        {
                            if (!tb.InButtonScope(name))
                            {
                                tb.Error(this);
                                tb.Process(new Token.StartTag(name));
                                // if no p to close, creates an empty <p></p>
                                return tb.Process(endTag);
                            }
                            else
                            {
                                tb.GenerateImpliedEndTags(name);
                                if (!tb.CurrentElement().NodeName.Equals(name))
                                {
                                    tb.Error(this);
                                }
                                tb.PopStackToClose(name);
                            }
                        }
                        else if (name.Equals("li"))
                        {
                            if (!tb.InListItemScope(name))
                            {
                                tb.Error(this);
                                return false;
                            }
                            else
                            {
                                tb.GenerateImpliedEndTags(name);
                                if (!tb.CurrentElement().NodeName.Equals(name))
                                {
                                    tb.Error(this);
                                }
                                tb.PopStackToClose(name);
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.DdDt))
                        {
                            if (!tb.InScope(name))
                            {
                                tb.Error(this);
                                return false;
                            }
                            else
                            {
                                tb.GenerateImpliedEndTags(name);
                                if (!tb.CurrentElement().NodeName.Equals(name))
                                {
                                    tb.Error(this);
                                }
                                tb.PopStackToClose(name);
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.Headings))
                        {
                            if (!tb.InScope(HtmlTreeBuilderState.Constants.Headings))
                            {
                                tb.Error(this);
                                return false;
                            }
                            else
                            {
                                tb.GenerateImpliedEndTags(name);
                                if (!tb.CurrentElement().NodeName.Equals(name))
                                {
                                    tb.Error(this);
                                }
                                tb.PopStackToClose(HtmlTreeBuilderState.Constants.Headings);
                            }
                        }
                        else if (name.Equals("sarcasm"))
                        {
                            // *sigh*
                            return this.AnyOtherEndTag(t, tb);
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyEndAdoptionFormatters))
                        {
                            // Adoption Agency Algorithm.
                            // OUTER:
                            for (int i = 0; i < 8; i++)
                            {
                                Element formatEl = tb.GetActiveFormattingElement(name);
                                if (formatEl == null)
                                {
                                    return this.AnyOtherEndTag(t, tb);
                                }
                                else if (!tb.OnStack(formatEl))
                                {
                                    tb.Error(this);
                                    tb.RemoveFromActiveFormattingElements(formatEl);
                                    return true;
                                }
                                else if (!tb.InScope(formatEl.NodeName))
                                {
                                    tb.Error(this);
                                    return false;
                                }
                                else if (tb.CurrentElement() != formatEl)
                                {
                                    tb.Error(this);
                                }
                                Element furthestBlock = null;
                                Element commonAncestor = null;
                                bool seenFormattingElement = false;
                                IList<Element> stack = tb.GetStack();
                                // the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents
                                // run-aways
                                int stackSize = stack.Count;
                                for (int si = 0; si < stackSize && si < 64; si++)
                                {
                                    Element el = stack[si];
                                    if (el == formatEl)
                                    {
                                        commonAncestor = stack[si - 1];
                                        seenFormattingElement = true;
                                    }
                                    else if (seenFormattingElement && tb.IsSpecial(el))
                                    {
                                        furthestBlock = el;
                                        break;
                                    }
                                }
                                if (furthestBlock == null)
                                {
                                    tb.PopStackToClose(formatEl.NodeName);
                                    tb.RemoveFromActiveFormattingElements(formatEl);
                                    return true;
                                }
                                // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
                                // does that mean: int pos of format el in list?
                                Element node = furthestBlock;
                                Element lastNode = furthestBlock;
                                for (int j = 0; j < 3; j++)
                                {
                                    if (tb.OnStack(node))
                                    {
                                        node = tb.AboveOnStack(node);
                                    }
                                    if (!tb.IsInActiveFormattingElements(node))
                                    {
                                        // note no bookmark check
                                        tb.RemoveFromStack(node);
                                        goto INNER_continue;
                                    }
                                    else
                                    {
                                        if (node == formatEl)
                                        {
                                            goto INNER_break;
                                        }
                                    }
                                    Element replacement = new Element(Tag.ValueOf(node.NodeName), tb.GetBaseUri());
                                    tb.ReplaceActiveFormattingElement(node, replacement);
                                    tb.ReplaceOnStack(node, replacement);
                                    node = replacement;
                                    if (lastNode == furthestBlock)
                                    {
                                    }
                                    // todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
                                    // not getting how this bookmark both straddles the element above, but is inbetween here...
                                    if (lastNode.Parent != null)
                                    {
                                        lastNode.Remove();
                                    }
                                    node.AppendChild(lastNode);
                                    lastNode = node;
                                INNER_continue: ;
                                }
                            INNER_break: ;
                                if (StringUtil.In(commonAncestor.NodeName, HtmlTreeBuilderState.Constants.InBodyEndTableFosters))
                                {
                                    if (lastNode.Parent!= null)
                                    {
                                        lastNode.Remove();
                                    }
                                    tb.InsertInFosterParent(lastNode);
                                }
                                else
                                {
                                    if (lastNode.Parent != null)
                                    {
                                        lastNode.Remove();
                                    }
                                    commonAncestor.AppendChild(lastNode);
                                }
                                Element adopter = new Element(formatEl.Tag, tb.GetBaseUri());
                                adopter.Attributes.SetAll(formatEl.Attributes);
                                Node[] childNodes = furthestBlock.ChildNodes.ToArray();
                                foreach (Node childNode in childNodes)
                                {
                                    adopter.AppendChild(childNode);
                                }
                                // append will reparent. thus the clone to avoid concurrent mod.
                                furthestBlock.AppendChild(adopter);
                                tb.RemoveFromActiveFormattingElements(formatEl);
                                // todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
                                tb.RemoveFromStack(formatEl);
                                tb.InsertOnStackAfter(furthestBlock, adopter);
                            }
                        }
                        else if (StringUtil.In(name, HtmlTreeBuilderState.Constants.InBodyStartApplets))
                        {
                            if (!tb.InScope("name"))
                            {
                                if (!tb.InScope(name))
                                {
                                    tb.Error(this);
                                    return false;
                                }
                                tb.GenerateImpliedEndTags();
                                if (!tb.CurrentElement().NodeName.Equals(name))
                                {
                                    tb.Error(this);
                                }
                                tb.PopStackToClose(name);
                                tb.ClearFormattingElementsToLastMarker();
                            }
                        }
                        else if (name.Equals("br"))
                        {
                            tb.Error(this);
                            tb.Process(new Token.StartTag("br"));
                            return false;
                        }
                        else
                        {
                            return this.AnyOtherEndTag(t, tb);
                        }
                        break;
                    }

                    case TokenType.EOF:
                    {
                        // todo: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html
                        // stop parsing
                        break;
                    }
                }
                return true;
            }
Example #30
0
 private static void AppendWhitespaceIfBr(Element element, StringBuilder accum)
 {
     if (element.TagName.Equals("br") && !TextNode.LastCharIsWhitespace(accum))
     {
         accum.Append(" ");
     }
 }
Example #31
0
 internal Element Insert(Token.StartTag startTag)
 {
     Tag tag = Tag.ValueOf(startTag.Name());
     // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html.
     Element el = new Element(tag, baseUri, startTag.attributes);
     InsertNode(el);
     if (startTag.IsSelfClosing())
     {
         tokeniser.AcknowledgeSelfClosingFlag();
         if (!tag.IsKnown)
         {
             // unknown tag, remember this is self closing for output. see above.
             tag.SetSelfClosing();
         }
     }
     else
     {
         stack.AddLast(el);
     }
     return el;
 }
Example #32
0
 private static void AccumulateParents(Element el, Elements parents)
 {
 	Element parent = el.Parent;
     if (parent != null && !parent.TagName.Equals("#root"))
     {
         parents.Add(parent);
         AccumulateParents(parent, parents);
     }
 }