Esempio n. 1
0
 public void TokenizationAttributeNameDetection()
 {
     var s = new SourceManager("<input required>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual("required", ((HtmlTagToken)token).Attributes[0].Key);
 }
Esempio n. 2
0
 public void TokenizationAttributesDetected()
 {
     var s = new SourceManager("<a target='_blank' href='http://whatever' title='ho'>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(3, ((HtmlTagToken)token).Attributes.Count);
 }
Esempio n. 3
0
 public void TokenizationBogusCommentQuestionMark()
 {
     var s = new SourceManager("<?>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.Comment, token.Type);
     Assert.AreEqual("?", ((HtmlCommentToken)token).Data);
 }
Esempio n. 4
0
 public void TokenizationBogusCommentEmpty()
 {
     var s = new SourceManager("<!>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.Comment, token.Type);
     Assert.AreEqual(String.Empty, ((HtmlCommentToken)token).Data);
 }
Esempio n. 5
0
 public void TokenizationBogusCommentClosingTag()
 {
     var s = new SourceManager("</ >");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.Comment, token.Type);
     Assert.AreEqual(" ", ((HtmlCommentToken)token).Data);
 }
Esempio n. 6
0
        public void TokenizationCDataCorrectCharacters()
        {
            StringBuilder sb = new StringBuilder();
            var s = new SourceManager("<![CDATA[hi mum how <!-- are you doing />]]>");
            var t = new HtmlTokenizer(s);
            t.AcceptsCharacterData = true;
            HtmlToken token;

            do
            {
                token = t.Get();

                if (token.Type == HtmlTokenType.Character)
                    sb.Append(((HtmlCharacterToken)token).Data);
            }
            while (token != HtmlToken.EOF);

            Assert.AreEqual("hi mum how <!-- are you doing />", sb.ToString());
        }
Esempio n. 7
0
 public void TokenizationTagSpacesBehind()
 {
     var s = new SourceManager("<i   >");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual("i", ((HtmlTagToken)token).Name);
 }
Esempio n. 8
0
 public void TokenizationTagSelfClosingDetected()
 {
     var s = new SourceManager("<img />");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(true, ((HtmlTagToken)token).IsSelfClosing);
 }
Esempio n. 9
0
 public void TokenizationUnusualDoctype()
 {
     var s = new SourceManager("<!DOCTYPE root_element SYSTEM \"DTD_location\">");
     var t = new HtmlTokenizer(s);
     var e = t.Get();
     Assert.AreEqual(HtmlTokenType.DOCTYPE, e.Type);
     var d = (HtmlDoctypeToken)e;
     Assert.IsFalse(d.IsNameMissing);
     Assert.AreEqual("root_element", d.Name);
     Assert.IsFalse(d.IsSystemIdentifierMissing);
     Assert.AreEqual("DTD_location", d.SystemIdentifier);
 }
Esempio n. 10
0
 public void TokenizationTagMixedCaseHandling()
 {
     var s = new SourceManager("<InpUT>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual("input", ((HtmlTagToken)token).Name);
 }
Esempio n. 11
0
 public void TokenizationStartTagDetection()
 {
     var s = new SourceManager("<p>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.StartTag, token.Type);
     Assert.AreEqual("p", ((HtmlTagToken)token).Name);
 }
Esempio n. 12
0
 public void TokenizationFinalEOF()
 {
     var s = new SourceManager("");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.EOF, token.Type);
 }
Esempio n. 13
0
 public void TokenizationDoctypeDetected()
 {
     var s = new SourceManager("<!doctype html>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.DOCTYPE, token.Type);
 }
Esempio n. 14
0
 public void TokenizationCommentDetected()
 {
     var s = new SourceManager("<!-- hi my friend -->");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.Comment, token.Type);
 }
Esempio n. 15
0
 public void TokenizationCDataDetected()
 {
     var s = new SourceManager("<![CDATA[hi mum how <!-- are you doing />]]>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual(HtmlTokenType.Characters, token.Type);
 }
Esempio n. 16
0
 public void TokenizationTagNameDetection()
 {
     var s = new SourceManager("<span>");
     var t = new HtmlTokenizer(s);
     var token = t.Get();
     Assert.AreEqual("span", ((HtmlTagToken)token).Name);
 }
Esempio n. 17
0
        public void TokenizationCharacterReferenceNotin()
        {
            var str = string.Empty;
            var src = "I'm &notin; I tell you";
            var s = new SourceManager(src);
            var t = new HtmlTokenizer(s);
            HtmlToken token;

            do
            {
                token = t.Get();

                if (token.Type == HtmlTokenType.Character)
                    str += ((HtmlCharacterToken)token).Data;
                else if (token.Type == HtmlTokenType.Characters)
                    str += new String(((HtmlCharactersToken)token).Data);
            }
            while (token != HtmlToken.EOF);

            Assert.AreEqual("I'm ∉ I tell you", str);
        }
Esempio n. 18
0
        /// <summary>
        /// Creates a new instance of the HTML parser with the specified document
        /// based on the given source manager.
        /// </summary>
        /// <param name="document">The document instance to be constructed.</param>
        /// <param name="source">The source to use.</param>
        internal HtmlParser(HTMLDocument document, SourceManager source)
        {
            tokenizer = new HtmlTokenizer(source);

            tokenizer.ErrorOccurred += (s, ev) =>
            {
                if (ErrorOccurred != null)
                    ErrorOccurred(this, ev);
            };

            started = false;
            doc = document;
            open = new List<Element>();
            formatting = new List<Element>();
            frameset = true;
            insert = HtmlTreeMode.Initial;
        }