コード例 #1
0
ファイル: TestParser.cs プロジェクト: itaibh/WebEngineSharp
        public void BasicStructure_HTML4()
        {
            Stream stream = File.OpenRead(Path.Combine("Resources", "BasicStructure_HTML4.html"));

            //TreeConstruction.Instance.

            HtmlTokenizer tokenizer = new HtmlTokenizer();
            IDocument doc = tokenizer.Tokenize(stream);

            //IDocument doc = HtmlParser.Parse(stream);

            Assert.AreEqual("Basic Structure Testing", doc.head.title, "title mismatch");
        }
コード例 #2
0
        public void ProcessToken(HtmlTokenizer tokenizer)
        {
            if (CurrentInsertionMode == null)
            {
                return;
            }

            BaseToken token = null;
            if (m_ReProcessTokensQueue.Count > 0)
            {
                token = m_ReProcessTokensQueue.Dequeue();
            } else if (m_TokensQueue.Count > 0)
            {
                token = m_TokensQueue.Dequeue();
            }

            if (token != null)
            {
                CurrentInsertionMode = CurrentInsertionMode.ProcessToken(tokenizer, this, token, m_Document);
            }
        }
コード例 #3
0
        public void ProcessToken(HtmlTokenizer tokenizer, BaseToken token)
        {
            INode node = AdjustedCurrentNode;
            if (node == null || IsNodeInNamespace(node, HTMLNamespace))
            {
                ActionA(tokenizer, token);
                return;
            }

            StartTagToken startTagToken = token as StartTagToken;
            if (IsMathMLIntegrationPoint(node))
            {
                if (startTagToken != null && startTagToken.TagName != "mglyph" && startTagToken.TagName != "malignmark")
                {
                    ActionA(tokenizer, token);
                    return;
                }

                if (token is CharacterToken)
                {
                    ActionA(tokenizer, token);
                    return;
                }
            }

            if (IsNodeInNamespace(node, MathMLNamespace) && node.nodeName == "annotation-xml")
            {
                if (startTagToken != null && startTagToken.TagName == "svg")
                {
                    ActionA(tokenizer, token);
                    return;
                }
            }

            if (IsHTMLIntegrationPoint(node) && ((startTagToken != null) || (token is CharacterToken)))
            {
                ActionA(tokenizer, token);
                return;
            }

            if (token is EndOfFileToken)
            {
                ActionA(tokenizer, token);
                return;
            }

            ActionB(tokenizer, token);
            /*
            The next token is the token that is about to be processed by the tree construction dispatcher (even if the token is subsequently just ignored).

            A node is a MathML text integration point if it is one of the following elements:

            An mi element in the MathML namespace
            An mo element in the MathML namespace
            An mn element in the MathML namespace
            An ms element in the MathML namespace
            An mtext element in the MathML namespace
            A node is an HTML integration point if it is one of the following elements:

            An annotation-xml element in the MathML namespace whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
            An annotation-xml element in the MathML namespace whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
            A foreignObject element in the SVG namespace
            A desc element in the SVG namespace
            A title element in the SVG namespace
            Not all of the tag names mentioned below are conformant tag names in this specification; many are included to handle legacy content. They still form part of the algorithm that implementations are required to implement to claim conformance.

            The algorithm described below places no limit on the depth of the DOM tree generated, or on the length of tag names, attribute names, attribute values, Text nodes, etc. While implementors are encouraged to avoid arbitrary limits, it is recognized that practical concerns will likely force user agents to impose nesting depth constraints.
             */
        }
コード例 #4
0
 private void ActionB(HtmlTokenizer tokenizer, BaseToken token)
 {
     // Process the token according to the rules given in the section for parsing tokens in foreign content.
     // TODO - 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
 }
コード例 #5
0
        private void ActionA(HtmlTokenizer tokenizer, BaseToken token)
        {
            // Process the token according to the rules given in the section corresponding to the current insertion
            // mode in HTML content.
            // http://www.w3.org/TR/html5/syntax.html#insertion-mode

            m_InsertionModeTokenProcessor.EnqueueToken(token);
            m_InsertionModeTokenProcessor.ProcessToken(tokenizer);
        }
コード例 #6
0
 public void ProcessToken(HtmlTokenizer tokenizer, char c)
 {
     CharacterToken token = new CharacterToken() { Character = c };
     ProcessToken(tokenizer, token);
 }