Ejemplo n.º 1
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                }
                else if (t.IsDoctype || IsWhitespace(t) ||
                         (t.IsStartTag && t.AsStartTag().Name.Equals("html")))
                {
                    return(tb.Process(t, InBody));
                }
                else if (t.IsEOF)
                {
                    // nice work chuck
                }
                else if (t.IsStartTag && t.AsStartTag().Name.Equals("noframes"))
                {
                    return(tb.Process(t, InHead));
                }
                else
                {
                    tb.Error(this);
                    return(false);
                }

                return(true);
            }
Ejemplo n.º 2
0
        public override bool Process(Token token)
        {
            // start tag, end tag, doctype, comment, character, eof
            switch (token.Type)
            {
            case TokenType.StartTag:
                Insert(token.AsStartTag());
                break;

            case TokenType.EndTag:
                PopStackToClose(token.AsEndTag());
                break;

            case TokenType.Comment:
                Insert(token.AsComment());
                break;

            case TokenType.Character:
                Insert(token.AsCharacter());
                break;

            case TokenType.Doctype:
                Insert(token.AsDoctype());
                break;

            case TokenType.EOF:     // could put some normalisation here if desired
                break;

            default:
                HtmlWarning.UnexpectedTokenType();
                break;
            }
            return(true);
        }
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                switch (t.Type)
                {
                case TokenType.Character:

                    Token.Character c = t.AsCharacter();
                    if (c.Data.Equals(NullString))
                    {
                        // TODO: confirm that check
                        tb.Error(this);
                        return(false);
                    }
                    else if (IsWhitespace(c))
                    {
                        tb.ReconstructFormattingElements();
                        tb.Insert(c);
                    }
                    else
                    {
                        tb.ReconstructFormattingElements();
                        tb.Insert(c);
                        tb.FramesetOK = false;
                    }
                    break;

                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    return(false);

                case TokenType.StartTag:
                    bool?result = HandleStartTag(t, tb);
                    if (result.HasValue)
                    {
                        return(result.Value);
                    }

                    break;

                case TokenType.EndTag:
                    result = HandleEndTag(t, tb);
                    if (result.HasValue)
                    {
                        return(result.Value);
                    }

                    break;

                case TokenType.EOF:
                    // TODO: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html
                    // stop parsing
                    break;
                }

                return(true);
            }
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    return(tb.Process(t, InBody));
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment()); // into html node
                }
                else if (t.IsDoctype)
                {
                    tb.Error(this);
                    return(false);
                }
                else if (t.IsStartTag && t.AsStartTag().Name.Equals("html"))
                {
                    return(tb.Process(t, InBody));
                }
                else if (t.IsEndTag && t.AsEndTag().Name.Equals("html"))
                {
                    if (tb.IsFragmentParsing())
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        tb.Transition(AfterAfterBody);
                    }
                }
                else if (t.IsEOF)
                {
                    // chillax! we're done
                }
                else
                {
                    tb.Error(this);
                    tb.Transition(InBody);
                    return(tb.Process(t));
                }

                return(true);
            }
Ejemplo n.º 5
0
 public override bool Process(Token t, HtmlTreeBuilder tb)
 {
     if (IsWhitespace(t))
     {
         return(true);
     }
     else if (t.IsComment)
     {
         tb.Insert(t.AsComment());
     }
     else if (t.IsDoctype)
     {
         tb.Error(this);
         return(false);
     }
     else if (t.IsStartTag &&
              t.AsStartTag().Name.Equals("html"))
     {
         return(InBody.Process(t, tb)); // does not transition
     }
     else if (t.IsStartTag &&
              t.AsStartTag().Name.Equals("head"))
     {
         HtmlElement head = tb.Insert(t.AsStartTag());
         tb.HeadElement = head;
         tb.Transition(InHead);
     }
     else if (t.IsEndTag && (StringUtil.In(t.AsEndTag().Name, "head", "body", "html", "br")))
     {
         tb.Process(new Token.StartTag("head"));
         return(tb.Process(t));
     }
     else if (t.IsEndTag)
     {
         tb.Error(this);
         return(false);
     }
     else
     {
         tb.Process(new Token.StartTag("head"));
         return(tb.Process(t));
     }
     return(true);
 }
Ejemplo n.º 6
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    return(true); // ignore whitespace
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                }
                else if (t.IsDoctype)
                {
                    // TODO: parse error check on expected doctypes
                    // TODO: quirk state check on doctype ids
                    Token.Doctype d = t.AsDoctype();

                    var doctype = tb.Document.CreateDocumentType(
                        d.Name,
                        d.PublicIdentifier,
                        d.SystemIdentifier
                        );
                    doctype.BaseUri = tb.BaseUri;

                    tb.Document.Append(doctype);
                    if (d.ForceQuirks)
                    {
                        tb.Document.QuirksMode = QuirksMode.Quirks;
                    }

                    tb.Transition(BeforeHtml);
                }
                else
                {
                    // TODO: check not iframe srcdoc
                    tb.Transition(BeforeHtml);
                    return(tb.Process(t)); // re-process token
                }

                return(true);
            }
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (t.IsDoctype)
                {
                    tb.Error(this);
                    return(false);
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                }
                else if (IsWhitespace(t))
                {
                    return(true); // ignore whitespace
                }
                else if (t.IsStartTag && t.AsStartTag().Name.Equals("html"))
                {
                    tb.Insert(t.AsStartTag());
                    tb.Transition(BeforeHead);
                }
                else if (t.IsEndTag && (StringUtil.In(t.AsEndTag().Name, "head", "body", "html", "br")))
                {
                    return(AnythingElse(t, tb));
                }
                else if (t.IsEndTag)
                {
                    tb.Error(this);
                    return(false);
                }
                else
                {
                    return(AnythingElse(t, tb));
                }

                return(true);
            }
Ejemplo n.º 8
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                switch (t.Type)
                {
                case TokenType.Character:
                    Token.Character c = t.AsCharacter();
                    if (c.Data.Equals(NullString))
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        tb.Insert(c);
                    }
                    break;

                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    return(false);

                case TokenType.StartTag:
                    Token.StartTag start = t.AsStartTag();
                    string         name  = start.Name;
                    if (name.Equals("html"))
                    {
                        return(tb.Process(start, InBody));
                    }

                    else if (name.Equals("option"))
                    {
                        tb.Process(new Token.EndTag("option"));
                        tb.Insert(start);
                    }
                    else if (name.Equals("optgroup"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("option"))
                        {
                            tb.Process(new Token.EndTag("option"));
                        }

                        else if (tb.CurrentElement.NodeName.Equals("optgroup"))
                        {
                            tb.Process(new Token.EndTag("optgroup"));
                        }

                        tb.Insert(start);
                    }
                    else if (name.Equals("select"))
                    {
                        tb.Error(this);
                        return(tb.Process(new Token.EndTag("select")));
                    }
                    else if (StringUtil.In(name, "input", "keygen", "textarea"))
                    {
                        tb.Error(this);

                        if (!tb.InSelectScope("select"))
                        {
                            return(false);    // frag
                        }
                        tb.Process(new Token.EndTag("select"));
                        return(tb.Process(start));
                    }
                    else if (name.Equals("script"))
                    {
                        return(tb.Process(t, InHead));
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EndTag:
                    Token.EndTag end = t.AsEndTag();
                    name = end.Name;

                    if (name.Equals("optgroup"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("option") &&
                            tb.AboveOnStack(tb.CurrentElement) != null &&
                            tb.AboveOnStack(tb.CurrentElement).NodeName.Equals("optgroup"))
                        {
                            tb.Process(new Token.EndTag("option"));
                        }

                        if (tb.CurrentElement.NodeName.Equals("optgroup"))
                        {
                            tb.Pop();
                        }
                        else
                        {
                            tb.Error(this);
                        }
                    }
                    else if (name.Equals("option"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("option"))
                        {
                            tb.Pop();
                        }
                        else
                        {
                            tb.Error(this);
                        }
                    }
                    else if (name.Equals("select"))
                    {
                        if (!tb.InSelectScope(name))
                        {
                            tb.Error(this);
                            return(false);
                        }
                        else
                        {
                            tb.PopStackToClose(name);
                            tb.ResetInsertionMode();
                        }
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EOF:
                    if (!tb.CurrentElement.NodeName.Equals("html"))
                    {
                        tb.Error(this);
                    }
                    break;

                default:
                    return(AnythingElse(t, tb));
                }

                return(true);
            }
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                    return(true);
                }

                switch (t.Type)
                {
                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    break;

                case TokenType.StartTag:
                    Token.StartTag startTag = t.AsStartTag();
                    string         name     = startTag.Name;

                    if (name.Equals("html"))
                    {
                        return(tb.Process(t, InBody));
                    }

                    else if (name.Equals("col"))
                    {
                        tb.InsertEmpty(startTag);
                    }

                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EndTag:
                    Token.EndTag endTag = t.AsEndTag();
                    name = endTag.Name;

                    if (name.Equals("colgroup"))
                    {
                        if (tb.CurrentElement.NodeName.Equals("html"))       // frag case
                        {
                            tb.Error(this);
                            return(false);
                        }
                        else
                        {
                            tb.Pop();
                            tb.Transition(InTable);
                        }
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EOF:
                    if (tb.CurrentElement.NodeName.Equals("html"))
                    {
                        return(true);    // stop parsing; frag case
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }

                default:
                    return(AnythingElse(t, tb));
                }

                return(true);
            }
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (t.IsCharacter)
                {
                    tb.NewPendingTableCharacters();
                    tb.MarkInsertionMode();
                    tb.Transition(InTableText);
                    return(tb.Process(t));
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                    return(true);
                }
                else if (t.IsDoctype)
                {
                    tb.Error(this);
                    return(false);
                }
                else if (t.IsStartTag)
                {
                    Token.StartTag startTag = t.AsStartTag();
                    string         name     = startTag.Name;

                    switch (name)
                    {
                    case "caption":
                        tb.ClearStackToTableContext();
                        tb.InsertMarkerToFormattingElements();
                        tb.Insert(startTag);
                        tb.Transition(InCaption);
                        break;

                    case "colgroup":
                        tb.ClearStackToTableContext();
                        tb.Insert(startTag);
                        tb.Transition(InColumnGroup);
                        break;

                    case "col":
                        tb.Process(new Token.StartTag("colgroup"));
                        return(tb.Process(t));

                    case "table":
                        tb.Error(this);
                        bool processed = tb.Process(new Token.EndTag("table"));
                        if (processed)     // only ignored if in fragment
                        {
                            return(tb.Process(t));
                        }
                        break;

                    case "tbody":
                    case "tfoot":
                    case "thead":
                        tb.ClearStackToTableContext();
                        tb.Insert(startTag);
                        tb.Transition(InTableBody);
                        break;

                    case "td":
                    case "th":
                    case "tr":
                        tb.Process(new Token.StartTag("tbody"));
                        return(tb.Process(t));

                    case "style":
                    case "script":
                        return(tb.Process(t, InHead));

                    case "input":
                        if (!startTag.Attributes["type"]
                            .Equals("hidden", StringComparison.OrdinalIgnoreCase))
                        {
                            return(AnythingElse(t, tb));
                        }
                        else
                        {
                            tb.InsertEmpty(startTag);
                        }
                        break;

                    case "form":
                        tb.Error(this);
                        if (tb.FormElement != null)
                        {
                            return(false);
                        }

                        else
                        {
                            HtmlElement form = tb.InsertEmpty(startTag);
                            tb.FormElement = form;
                        }
                        break;

                    default:
                        return(AnythingElse(t, tb));
                    }
                }
                else if (t.IsEndTag)
                {
                    Token.EndTag endTag = t.AsEndTag();
                    string       name   = endTag.Name;

                    switch (name)
                    {
                    case "table":
                        if (!tb.InTableScope(name))
                        {
                            tb.Error(this);
                            return(false);
                        }
                        else
                        {
                            tb.PopStackToClose("table");
                        }
                        tb.ResetInsertionMode();
                        break;

                    case "body":
                    case "caption":
                    case "col":
                    case "colgroup":
                    case "html":
                    case "tbody":
                    case "td":
                    case "tfoot":
                    case "th":
                    case "thead":
                    case "tr":
                        tb.Error(this);
                        return(false);

                    default:
                        return(AnythingElse(t, tb));
                    }
                }
                else if (t.IsEOF)
                {
                    if (tb.CurrentElement.NodeName.Equals("html"))
                    {
                        tb.Error(this);
                    }

                    return(true); // stops parsing
                }

                return(AnythingElse(t, tb));
            }
Ejemplo n.º 11
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                }
                else if (t.IsDoctype)
                {
                    tb.Error(this);
                    return(false);
                }
                else if (t.IsStartTag)
                {
                    Token.StartTag start = t.AsStartTag();
                    switch (start.Name)
                    {
                    case "html":
                        return(tb.Process(start, InBody));

                    case "frameset":
                        tb.Insert(start);
                        break;

                    case "frame":
                        tb.InsertEmpty(start);
                        break;

                    case "noframes":
                        return(tb.Process(start, InHead));

                    default:
                        tb.Error(this);
                        return(false);
                    }
                }
                else if (t.IsEndTag && t.AsEndTag().Name.Equals("frameset"))
                {
                    if (tb.CurrentElement.NodeName.Equals("html"))   // frag
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        tb.Pop();
                        if (!tb.IsFragmentParsing() && !tb.CurrentElement.NodeName.Equals("frameset"))
                        {
                            tb.Transition(AfterFrameset);
                        }
                    }
                }
                else if (t.IsEOF)
                {
                    if (!tb.CurrentElement.NodeName.Equals("html"))
                    {
                        tb.Error(this);
                        return(true);
                    }
                }
                else
                {
                    tb.Error(this);
                    return(false);
                }

                return(true);
            }
Ejemplo n.º 12
0
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                    return(true);
                }

                switch (t.Type)
                {
                case TokenType.Comment:
                    tb.Insert(t.AsComment());
                    break;

                case TokenType.Doctype:
                    tb.Error(this);
                    return(false);

                case TokenType.StartTag:
                    Token.StartTag start = t.AsStartTag();
                    string         name  = start.Name;

                    if (name.Equals("html"))
                    {
                        return(InBody.Process(t, tb));
                    }
                    else if (StringUtil.In(name, "base", "basefont", "bgsound", "command", "link"))
                    {
                        HtmlElement el = tb.InsertEmpty(start);
                        // jsoup special: update base the frist time it is seen
                        if (name.Equals("base") && el.HasAttribute("href"))
                        {
                            tb.MaybeSetBaseUri(el);
                        }
                    }
                    else if (name.Equals("meta"))
                    {
                        HtmlElement meta = tb.InsertEmpty(start);
                        // TODO: charset switches
                    }
                    else if (name.Equals("title"))
                    {
                        HandleRcData(start, tb);
                    }
                    else if (StringUtil.In(name, "noframes", "style"))
                    {
                        HandleRawtext(start, tb);
                    }
                    else if (name.Equals("noscript"))
                    {
                        // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
                        tb.Insert(start);
                        tb.Transition(InHeadNoscript);
                    }
                    else if (name.Equals("script"))
                    {
                        // skips some script rules as won't execute them
                        tb.Insert(start);
                        tb.tokeniser.Transition(TokeniserState.ScriptData);
                        tb.MarkInsertionMode();
                        tb.Transition(Text);
                    }
                    else if (name.Equals("head"))
                    {
                        tb.Error(this);
                        return(false);
                    }
                    else
                    {
                        return(AnythingElse(t, tb));
                    }
                    break;

                case TokenType.EndTag:
                    Token.EndTag end = t.AsEndTag();
                    name = end.Name;
                    if (name.Equals("head"))
                    {
                        tb.Pop();
                        tb.Transition(AfterHead);
                    }
                    else if (StringUtil.In(name, "body", "html", "br"))
                    {
                        return(AnythingElse(t, tb));
                    }
                    else
                    {
                        tb.Error(this);
                        return(false);
                    }
                    break;

                default:
                    return(AnythingElse(t, tb));
                }
                return(true);
            }
            public override bool Process(Token t, HtmlTreeBuilder tb)
            {
                if (IsWhitespace(t))
                {
                    tb.Insert(t.AsCharacter());
                }
                else if (t.IsComment)
                {
                    tb.Insert(t.AsComment());
                }
                else if (t.IsDoctype)
                {
                    tb.Error(this);
                }
                else if (t.IsStartTag)
                {
                    Token.StartTag startTag = t.AsStartTag();
                    string         name     = startTag.Name;

                    switch (name)
                    {
                    case "html":
                        return(tb.Process(t, InBody));

                    case "body":
                        tb.Insert(startTag);
                        tb.FramesetOK = false;
                        tb.Transition(InBody);
                        break;

                    case "frameset":
                        tb.Insert(startTag);
                        tb.Transition(InFrameset);
                        break;

                    case "base":
                    case "basefont":
                    case "bgsound":
                    case "link":
                    case "meta":
                    case "noframes":
                    case "script":
                    case "style":
                    case "title":
                        tb.Error(this);

                        HtmlElement head = tb.HeadElement;
                        tb.Push(head);
                        tb.Process(t, InHead);
                        tb.RemoveFromStack(head);
                        break;

                    case "head":
                        tb.Error(this);
                        return(false);

                    default:
                        AnythingElse(t, tb);
                        break;
                    }
                }
                else if (t.IsEndTag)
                {
                    if (StringUtil.In(t.AsEndTag().Name, "body", "html"))
                    {
                        AnythingElse(t, tb);
                    }
                    else
                    {
                        tb.Error(this);
                        return(false);
                    }
                }
                else
                {
                    AnythingElse(t, tb);
                }
                return(true);
            }