コード例 #1
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                switch (t.Type)
                {
                case TokenType.Character:

                    Token.Character c = t.AsCharacter();
                    if (c.Data.Equals(NullString))
                    {
                        // TODO: confirm that check
                        tb.Error(this);
                        return(false);
                    }
                    else if (IsWhitespace(c))
                    {
                        tb.ReconstructFormattingElements();
                        tb.Insert(c);
                    }
                    else
                    {
                        tb.ReconstructFormattingElements();
                        tb.Insert(c);
                        tb.FramesetOK = false;
                    }
                    break;

                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    return(false);

                case TokenType.StartTag:
                    bool?result = HandleStartTag(t, tb);
                    if (result.HasValue)
                    {
                        return(result.Value);
                    }

                    break;

                case TokenType.EndTag:
                    result = HandleEndTag(t, tb);
                    if (result.HasValue)
                    {
                        return(result.Value);
                    }

                    break;

                case TokenType.EOF:
                    // TODO: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html
                    // stop parsing
                    break;
                }

                return(true);
            }
コード例 #2
0
        public override bool Process(Token token)
        {
            // start tag, end tag, doctype, comment, character, eof
            switch (token.Type)
            {
            case TokenType.StartTag:
                Insert(token.AsStartTag());
                break;

            case TokenType.EndTag:
                PopStackToClose(token.AsEndTag());
                break;

            case TokenType.Comment:
                Insert(token.AsComment());
                break;

            case TokenType.Character:
                Insert(token.AsCharacter());
                break;

            case TokenType.Doctype:
                Insert(token.AsDoctype());
                break;

            case TokenType.EOF:     // could put some normalisation here if desired
                break;

            default:
                HtmlWarning.UnexpectedTokenType();
                break;
            }
            return(true);
        }
コード例 #3
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                switch (t.Type)
                {
                case TokenType.Character:
                    Token.Character c = t.AsCharacter();
                    if (c.Data.Equals(NullString))
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        tb.GetPendingTableCharacters().Add(c);
                    }
                    break;

                default:
                    if (tb.GetPendingTableCharacters().Count > 0)
                    {
                        foreach (Token.Character char2 in tb.GetPendingTableCharacters())
                        {
                            if (!IsWhitespace(char2))
                            {
                                // InTable anything else section:
                                tb.Error(this);

                                if (StringUtil.In(tb.CurrentElement.NodeName, "table", "tbody", "tfoot", "thead", "tr"))
                                {
                                    tb.SetFosterInserts(true);
                                    tb.Process(char2, InBody);
                                    tb.SetFosterInserts(false);
                                }
                                else
                                {
                                    tb.Process(char2, InBody);
                                }
                            }
                            else
                            {
                                tb.Insert(char2);
                            }
                        }
                        tb.NewPendingTableCharacters();
                    }
                    tb.Transition(tb.OriginalState);
                    return(tb.Process(t));
                }

                return(true);
            }
コード例 #4
0
        private static bool IsWhitespace(Token t)
        {
            if (t.IsCharacter)
            {
                string data = t.AsCharacter().Data;

                // TODO: this checks more than spec - "\t", "\n", "\f", "\r", " "
                foreach (char c in data)
                {
                    if (!StringUtil.IsWhitespace(c))
                    {
                        return(false);
                    }
                }
                return(true);
            }

            return(false);
        }
コード例 #5
0
 // in script, style etc. normally treated as data tags
 public override bool Process(Token t, HtmlTreeBuilder tb)
 {
     if (t.IsCharacter)
     {
         tb.Insert(t.AsCharacter());
     }
     else if (t.IsEOF)
     {
         tb.Error(this);
         // if current node is script: already started
         tb.Pop();
         tb.Transition(tb.OriginalState);
         return(tb.Process(t));
     }
     else if (t.IsEndTag)
     {
         // if: An end tag whose tag name is "script" -- scripting nesting level, if evaluating scripts
         tb.Pop();
         tb.Transition(tb.OriginalState);
     }
     return(true);
 }
コード例 #6
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                switch (t.Type)
                {
                case TokenType.Character:
                    Token.Character c = t.AsCharacter();
                    if (c.Data.Equals(NullString))
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        tb.Insert(c);
                    }
                    break;

                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    return(false);

                case TokenType.StartTag:
                    Token.StartTag start = t.AsStartTag();
                    string         name  = start.Name;
                    if (name.Equals("html"))
                    {
                        return(tb.Process(start, InBody));
                    }

                    else if (name.Equals("option"))
                    {
                        tb.Process(new Token.EndTag("option"));
                        tb.Insert(start);
                    }
                    else if (name.Equals("optgroup"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("option"))
                        {
                            tb.Process(new Token.EndTag("option"));
                        }

                        else if (tb.CurrentElement.NodeName.Equals("optgroup"))
                        {
                            tb.Process(new Token.EndTag("optgroup"));
                        }

                        tb.Insert(start);
                    }
                    else if (name.Equals("select"))
                    {
                        tb.Error(this);
                        return(tb.Process(new Token.EndTag("select")));
                    }
                    else if (StringUtil.In(name, "input", "keygen", "textarea"))
                    {
                        tb.Error(this);

                        if (!tb.InSelectScope("select"))
                        {
                            return(false);    // frag
                        }
                        tb.Process(new Token.EndTag("select"));
                        return(tb.Process(start));
                    }
                    else if (name.Equals("script"))
                    {
                        return(tb.Process(t, InHead));
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EndTag:
                    Token.EndTag end = t.AsEndTag();
                    name = end.Name;

                    if (name.Equals("optgroup"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("option") &&
                            tb.AboveOnStack(tb.CurrentElement) != null &&
                            tb.AboveOnStack(tb.CurrentElement).NodeName.Equals("optgroup"))
                        {
                            tb.Process(new Token.EndTag("option"));
                        }

                        if (tb.CurrentElement.NodeName.Equals("optgroup"))
                        {
                            tb.Pop();
                        }
                        else
                        {
                            tb.Error(this);
                        }
                    }
                    else if (name.Equals("option"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("option"))
                        {
                            tb.Pop();
                        }
                        else
                        {
                            tb.Error(this);
                        }
                    }
                    else if (name.Equals("select"))
                    {
                        if (!tb.InSelectScope(name))
                        {
                            tb.Error(this);
                            return(false);
                        }
                        else
                        {
                            tb.PopStackToClose(name);
                            tb.ResetInsertionMode();
                        }
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EOF:
                    if (!tb.CurrentElement.NodeName.Equals("html"))
                    {
                        tb.Error(this);
                    }
                    break;

                default:
                    return(AnythingElse(t, tb));
                }

                return(true);
            }
コード例 #7
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                    return(true);
                }

                switch (t.Type)
                {
                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    break;

                case TokenType.StartTag:
                    Token.StartTag startTag = t.AsStartTag();
                    string         name     = startTag.Name;

                    if (name.Equals("html"))
                    {
                        return(tb.Process(t, InBody));
                    }

                    else if (name.Equals("col"))
                    {
                        tb.InsertEmpty(startTag);
                    }

                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EndTag:
                    Token.EndTag endTag = t.AsEndTag();
                    name = endTag.Name;

                    if (name.Equals("colgroup"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("html"))       // frag case
                        {
                            tb.Error(this);
                            return(false);
                        }
                        else
                        {
                            tb.Pop();
                            tb.Transition(InTable);
                        }
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EOF:
                    if (tb.CurrentElement.NodeName.Equals("html"))
                    {
                        return(true);    // stop parsing; frag case
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }

                default:
                    return(AnythingElse(t, tb));
                }

                return(true);
            }
コード例 #8
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                }
                else if (t.IsDoctype)
                {
                    tb.Error(this);
                    return(false);
                }
                else if (t.IsStartTag)
                {
                    Token.StartTag start = t.AsStartTag();
                    switch (start.Name)
                    {
                    case "html":
                        return(tb.Process(start, InBody));

                    case "frameset":
                        tb.Insert(start);
                        break;

                    case "frame":
                        tb.InsertEmpty(start);
                        break;

                    case "noframes":
                        return(tb.Process(start, InHead));

                    default:
                        tb.Error(this);
                        return(false);
                    }
                }
                else if (t.IsEndTag && t.AsEndTag().Name.Equals("frameset"))
                {
                    if (tb.CurrentElement.NodeName.Equals("html"))   // frag
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        tb.Pop();
                        if (!tb.IsFragmentParsing() && !tb.CurrentElement.NodeName.Equals("frameset"))
                        {
                            tb.Transition(AfterFrameset);
                        }
                    }
                }
                else if (t.IsEOF)
                {
                    if (!tb.CurrentElement.NodeName.Equals("html"))
                    {
                        tb.Error(this);
                        return(true);
                    }
                }
                else
                {
                    tb.Error(this);
                    return(false);
                }

                return(true);
            }
コード例 #9
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                    return(true);
                }

                switch (t.Type)
                {
                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    return(false);

                case TokenType.StartTag:
                    Token.StartTag start = t.AsStartTag();
                    string         name  = start.Name;

                    if (name.Equals("html"))
                    {
                        return(InBody.Process(t, tb));
                    }
                    else if (StringUtil.In(name, "base", "basefont", "bgsound", "command", "link"))
                    {
                        HtmlElement el = tb.InsertEmpty(start);
                        // jsoup special: update base the frist time it is seen
                        if (name.Equals("base") && el.HasAttribute("href"))
                        {
                            tb.MaybeSetBaseUri(el);
                        }
                    }
                    else if (name.Equals("meta"))
                    {
                        HtmlElement meta = tb.InsertEmpty(start);
                        // TODO: charset switches
                    }
                    else if (name.Equals("title"))
                    {
                        HandleRcData(start, tb);
                    }
                    else if (StringUtil.In(name, "noframes", "style"))
                    {
                        HandleRawtext(start, tb);
                    }
                    else if (name.Equals("noscript"))
                    {
                        // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
                        tb.Insert(start);
                        tb.Transition(InHeadNoscript);
                    }
                    else if (name.Equals("script"))
                    {
                        // skips some script rules as won't execute them
                        tb.Insert(start);
                        tb.tokeniser.Transition(TokeniserState.ScriptData);
                        tb.MarkInsertionMode();
                        tb.Transition(Text);
                    }
                    else if (name.Equals("head"))
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EndTag:
                    Token.EndTag end = t.AsEndTag();
                    name = end.Name;
                    if (name.Equals("head"))
                    {
                        tb.Pop();
                        tb.Transition(AfterHead);
                    }
                    else if (StringUtil.In(name, "body", "html", "br"))
                    {
                        return(AnythingElse(t, tb));
                    }
                    else
                    {
                        tb.Error(this);
                        return(false);
                    }
                    break;

                default:
                    return(AnythingElse(t, tb));
                }
                return(true);
            }
コード例 #10
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                }
                else if (t.IsDoctype)
                {
                    tb.Error(this);
                }
                else if (t.IsStartTag)
                {
                    Token.StartTag startTag = t.AsStartTag();
                    string         name     = startTag.Name;

                    switch (name)
                    {
                    case "html":
                        return(tb.Process(t, InBody));

                    case "body":
                        tb.Insert(startTag);
                        tb.FramesetOK = false;
                        tb.Transition(InBody);
                        break;

                    case "frameset":
                        tb.Insert(startTag);
                        tb.Transition(InFrameset);
                        break;

                    case "base":
                    case "basefont":
                    case "bgsound":
                    case "link":
                    case "meta":
                    case "noframes":
                    case "script":
                    case "style":
                    case "title":
                        tb.Error(this);

                        HtmlElement head = tb.HeadElement;
                        tb.Push(head);
                        tb.Process(t, InHead);
                        tb.RemoveFromStack(head);
                        break;

                    case "head":
                        tb.Error(this);
                        return(false);

                    default:
                        AnythingElse(t, tb);
                        break;
                    }
                }
                else if (t.IsEndTag)
                {
                    if (StringUtil.In(t.AsEndTag().Name, "body", "html"))
                    {
                        AnythingElse(t, tb);
                    }
                    else
                    {
                        tb.Error(this);
                        return(false);
                    }
                }
                else
                {
                    AnythingElse(t, tb);
                }
                return(true);
            }