public override void Read(Tokeniser t, CharacterReader r)
            {
                char c = r.Consume();

                switch (c)
                {
                case '\t':
                case '\n':
                case '\f':
                case ' ':
                    // ignore
                    break;

                case '/':
                    t.Transition(SelfClosingStartTag);
                    break;

                case '=':
                    t.Transition(BeforeAttributeValue);
                    break;

                case '>':
                    t.EmitTagPending();
                    t.Transition(Data);
                    break;

                case nullChar:
                    t.Error(this);
                    t.tagPending.AppendAttributeName(replacementChar);
                    t.Transition(AttributeName);
                    break;

                case eof:
                    t.EofError(this);
                    t.Transition(Data);
                    break;

                case '"':
                case '\'':
                case '<':
                    t.Error(this);
                    t.tagPending.NewAttribute();
                    t.tagPending.AppendAttributeName(c);
                    t.Transition(AttributeName);
                    break;

                default:     // A-Z, anything else
                    t.tagPending.NewAttribute();
                    r.Unconsume();
                    t.Transition(AttributeName);
                    break;
                }
            }
예제 #2
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.Matches('-'))
     {
         t.Emit('-');
         t.AdvanceTransition(ScriptDataEscapedDashDash);
     }
     else
     {
         t.Transition(ScriptData);
     }
 }
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateTagPending(false);
         t.Transition(ScriptDataEndTagName);
     }
     else
     {
         t.Emit("</");
         t.Transition(ScriptData);
     }
 }
예제 #4
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                // TODO: handle bogus comment starting from eof. when does that trigger?
                // rewind to capture char that lead us here
                r.Unconsume();
                Token.Comment comment = new Token.Comment();
                comment.data.Append(r.ConsumeTo('>'));
                comment.IsBogus = true;

                // TODO: replace nullChar with replaceChar
                t.Emit(comment);
                t.AdvanceTransition(Data);
            }
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.Matches('/'))
     {
         t.CreateTempBuffer();
         t.AdvanceTransition(RawtextEndTagOpen);
     }
     else
     {
         t.Emit('<');
         t.Transition(Rawtext);
     }
 }
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.Matches('/'))
     {
         t.Emit('/');
         t.CreateTempBuffer();
         t.AdvanceTransition(ScriptDataDoubleEscapeEnd);
     }
     else
     {
         t.Transition(ScriptDataDoubleEscaped);
     }
 }
예제 #7
0
        protected virtual void InitialiseParse(string input, Uri baseUri, HtmlParseErrorCollection errors)
        {
            if (input == null)
            {
                throw new ArgumentNullException(nameof(input));
            }

            this.doc       = CreateDocument(baseUri);
            this.reader    = new CharacterReader(input);
            this.errors    = errors;
            this.tokeniser = new Tokeniser(reader, errors);
            this.stack     = new DescendableLinkedList <DomContainer>();
            this.baseUri   = baseUri;
        }
예제 #8
0
            // from before attribute name
            public override void Read(Tokeniser t, CharacterReader r)
            {
                string name = r.ConsumeToAny('\t', '\n', '\f', ' ', '/', '=', '>', nullChar, '"', '\'', '<');

                t.tagPending.AppendAttributeName(name.ToLowerInvariant());

                char c = r.Consume();

                switch (c)
                {
                case '\t':
                case '\n':
                case '\f':
                case ' ':
                    t.Transition(AfterAttributeName);
                    break;

                case '/':
                    t.Transition(SelfClosingStartTag);
                    break;

                case '=':
                    t.Transition(BeforeAttributeValue);
                    break;

                case '>':
                    t.EmitTagPending();
                    t.Transition(Data);
                    break;

                case nullChar:
                    t.Error(this);
                    t.tagPending.AppendAttributeName(replacementChar);
                    break;

                case eof:
                    t.EofError(this);
                    t.Transition(Data);
                    break;

                case '"':
                case '\'':
                case '<':
                    t.Error(this);
                    t.tagPending.AppendAttributeName(c);
                    // no default, as covered in consumeToAny
                    break;
                }
            }
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateTagPending(false);
         t.tagPending.AppendTagName(char.ToLowerInvariant(r.Current));
         t.dataBuffer.Append(r.Current);
         t.AdvanceTransition(ScriptDataEscapedEndTagName);
     }
     else
     {
         t.Emit("</");
         t.Transition(ScriptDataEscaped);
     }
 }
예제 #10
0
            // from & in data

            public override void Read(Tokeniser t, CharacterReader r)
            {
                string c = t.ConsumeCharacterReference(null, false);

                if (c == null)
                {
                    t.Emit('&');
                }
                else
                {
                    t.Emit(c);
                }

                t.Transition(Data);
            }
예제 #11
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                char c = r.Consume();

                switch (c)
                {
                case '\t':
                case '\n':
                case '\f':
                case ' ':
                    t.Transition(BeforeDoctypeSystemIdentifier);
                    break;

                case '>':
                    t.Error(this);
                    t.doctypePending.forceQuirks = true;
                    t.EmitDoctypePending();
                    t.Transition(Data);
                    break;

                case '"':
                    t.Error(this);
                    // system id empty
                    t.Transition(DoctypeSystemIdentifier_doubleQuoted);
                    break;

                case '\'':
                    t.Error(this);
                    // system id empty
                    t.Transition(DoctypeSystemIdentifier_singleQuoted);
                    break;

                case eof:
                    t.EofError(this);
                    t.doctypePending.forceQuirks = true;
                    t.EmitDoctypePending();
                    t.Transition(Data);
                    break;

                default:
                    t.Error(this);
                    t.doctypePending.forceQuirks = true;
                    t.EmitDoctypePending();
                    break;
                }
            }
            public override void Read(Tokeniser t, CharacterReader r)
            {
                string value = r.ConsumeToAny('\'', '&', nullChar);

                if (value.Length > 0)
                {
                    t.tagPending.AppendAttributeValue(value);
                }

                char c = r.Consume();

                switch (c)
                {
                case '\'':
                    t.Transition(AfterAttributeValue_quoted);
                    break;

                case '&':
                    string ref2 = t.ConsumeCharacterReference('\'', true);
                    if (ref2 == null)
                    {
                        t.tagPending.AppendAttributeValue('&');
                    }
                    else
                    {
                        t.tagPending.AppendAttributeValue(ref2);
                    }

                    break;

                case nullChar:
                    t.Error(this);
                    t.tagPending.AppendAttributeValue(replacementChar);
                    break;

                case eof:
                    t.EofError(this);
                    t.Transition(Data);
                    break;
                    // no default, handled in consume to any above
                }
            }
예제 #13
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    string name = r.ConsumeLetterSequence();
                    t.tagPending.AppendTagName(name.ToLowerInvariant());
                    t.dataBuffer.Append(name);
                    return;
                }

                if (t.IsAppropriateEndTagToken() && !r.IsEmpty)
                {
                    char c = r.Consume();
                    switch (c)
                    {
                    case '\t':
                    case '\n':
                    case '\f':
                    case ' ':
                        t.Transition(BeforeAttributeName);
                        break;

                    case '/':
                        t.Transition(SelfClosingStartTag);
                        break;

                    case '>':
                        t.EmitTagPending();
                        t.Transition(Data);
                        break;

                    default:
                        t.dataBuffer.Append(c);
                        AnythingElse(t, r);
                        break;
                    }
                }
                else
                {
                    AnythingElse(t, r);
                }
            }
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    string name = r.ConsumeLetterSequence();
                    t.doctypePending.name.Append(name.ToLowerInvariant());
                    return;
                }

                char c = r.Consume();

                switch (c)
                {
                case '>':
                    t.EmitDoctypePending();
                    t.Transition(Data);
                    break;

                case '\t':
                case '\n':
                case '\f':
                case ' ':
                    t.Transition(AfterDoctypeName);
                    break;

                case nullChar:
                    t.Error(this);
                    t.doctypePending.name.Append(replacementChar);
                    break;

                case eof:
                    t.EofError(this);
                    t.doctypePending.forceQuirks = true;
                    t.EmitDoctypePending();
                    t.Transition(Data);
                    break;

                default:
                    t.doctypePending.name.Append(c);
                    break;
                }
            }
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateTempBuffer();
         t.dataBuffer.Append(char.ToLowerInvariant(r.Current));
         t.Emit("<" + r.Current);
         t.AdvanceTransition(ScriptDataDoubleEscapeStart);
     }
     else if (r.Matches('/'))
     {
         t.CreateTempBuffer();
         t.AdvanceTransition(ScriptDataEscapedEndTagOpen);
     }
     else
     {
         t.Emit('<');
         t.Transition(ScriptDataEscaped);
     }
 }
예제 #16
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                switch (r.Current)
                {
                case nullChar:
                    t.Error(this);
                    r.Advance();
                    t.Emit(replacementChar);
                    break;

                case eof:
                    t.Emit(Token.EOF.Instance);
                    break;

                default:
                    string data = r.ConsumeTo(nullChar);
                    t.Emit(data);
                    break;
                }
            }
            public override void Read(Tokeniser t, CharacterReader r)
            {
                switch (r.Consume())
                {
                case '/':
                    t.CreateTempBuffer();
                    t.Transition(ScriptDataEndTagOpen);
                    break;

                case '!':
                    t.Emit("<!");
                    t.Transition(ScriptDataEscapeStart);
                    break;

                default:
                    t.Emit("<");
                    r.Unconsume();
                    t.Transition(ScriptData);
                    break;
                }
            }
예제 #18
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                char c = r.Consume();

                switch (c)
                {
                case '>':
                    t.EmitDoctypePending();
                    t.Transition(Data);
                    break;

                case eof:
                    t.EmitDoctypePending();
                    t.Transition(Data);
                    break;

                default:
                    // ignore char
                    break;
                }
            }
            public override void Read(Tokeniser t, CharacterReader r)
            {
                char c = r.Consume();

                switch (c)
                {
                case '>':
                    t.EmitCommentPending();
                    t.Transition(Data);
                    break;

                case nullChar:
                    t.Error(this);
                    t.commentPending.data.Append("--").Append(replacementChar);
                    t.Transition(Comment);
                    break;

                case '!':
                    t.Error(this);
                    t.Transition(CommentEndBang);
                    break;

                case '-':
                    t.Error(this);
                    t.commentPending.data.Append('-');
                    break;

                case eof:
                    t.EofError(this);
                    t.EmitCommentPending();
                    t.Transition(Data);
                    break;

                default:
                    t.Error(this);
                    t.commentPending.data.Append("--").Append(c);
                    t.Transition(Comment);
                    break;
                }
            }
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    t.CreateDoctypePending();
                    t.Transition(DoctypeName);
                    return;
                }

                char c = r.Consume();

                switch (c)
                {
                case '\t':
                case '\n':
                case '\f':
                case ' ':
                    break;     // ignore whitespace

                case nullChar:
                    t.Error(this);
                    t.doctypePending.name.Append(replacementChar);
                    t.Transition(DoctypeName);
                    break;

                case eof:
                    t.EofError(this);
                    t.CreateDoctypePending();
                    t.doctypePending.forceQuirks = true;
                    t.EmitDoctypePending();
                    t.Transition(Data);
                    break;

                default:
                    t.CreateDoctypePending();
                    t.doctypePending.name.Append(c);
                    t.Transition(DoctypeName);
                    break;
                }
            }
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    string name = r.ConsumeLetterSequence();
                    t.dataBuffer.Append(name.ToLowerInvariant());
                    t.Emit(name);
                    return;
                }

                char c = r.Consume();

                switch (c)
                {
                case '\t':
                case '\n':
                case '\f':
                case ' ':
                case '/':
                case '>':

                    if (t.dataBuffer.ToString().Equals("script"))
                    {
                        t.Transition(ScriptDataEscaped);
                    }
                    else
                    {
                        t.Transition(ScriptDataDoubleEscaped);
                    }

                    t.Emit(c);
                    break;

                default:
                    r.Unconsume();
                    t.Transition(ScriptDataDoubleEscaped);
                    break;
                }
            }
예제 #22
0
 // from < in rcdata
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.Matches('/'))
     {
         t.CreateTempBuffer();
         t.AdvanceTransition(RCDATAEndTagOpen);
     }
     else if (r.MatchesLetter() && !r.ContainsIgnoreCase("</" + t.AppropriateEndTagName()))
     {
         // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than
         // consuming to EOF; break out here
         t.tagPending = new Token.EndTag(t.AppropriateEndTagName());
         t.EmitTagPending();
         r.Unconsume(); // undo "<"
         t.Transition(Data);
     }
     else
     {
         t.Emit("<");
         t.Transition(Rcdata);
     }
 }
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.IsEmpty)
                {
                    t.EofError(this);
                    t.Transition(Data);
                    return;
                }

                char c = r.Consume();

                switch (c)
                {
                case '-':
                    t.Emit(c);
                    break;

                case '<':
                    t.Transition(ScriptDataEscapedLessThanSign);
                    break;

                case '>':
                    t.Emit(c);
                    t.Transition(ScriptData);
                    break;

                case nullChar:
                    t.Error(this);
                    t.Emit(replacementChar);
                    t.Transition(ScriptDataEscaped);
                    break;

                default:
                    t.Emit(c);
                    t.Transition(ScriptDataEscaped);
                    break;
                }
            }
예제 #24
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                char c = r.Consume();

                switch (c)
                {
                case '>':
                    t.tagPending.selfClosing = true;
                    t.EmitTagPending();
                    t.Transition(Data);
                    break;

                case eof:
                    t.EofError(this);
                    t.Transition(Data);
                    break;

                default:
                    t.Error(this);
                    t.Transition(BeforeAttributeName);
                    break;
                }
            }
            // from < or </ in data, will have start or end tag pending
            public override void Read(Tokeniser t, CharacterReader r)
            {
                // previous TagOpen state did NOT consume, will have a letter char in current
                string tagName = r.ConsumeToAny('\t', '\n', '\f', ' ', '/', '>', nullChar).ToLowerInvariant();

                t.tagPending.AppendTagName(tagName);

                switch (r.Consume())
                {
                case '\t':
                case '\n':
                case '\f':
                case ' ':
                    t.Transition(BeforeAttributeName);
                    break;

                case '/':
                    t.Transition(SelfClosingStartTag);
                    break;

                case '>':
                    t.EmitTagPending();
                    t.Transition(Data);
                    break;

                case nullChar:     // replacement
                    t.tagPending.AppendTagName(replacementStr);
                    break;

                case eof:     // should emit pending tag?
                    t.EofError(this);
                    t.Transition(Data);
                    // no default, as covered with above consumeToAny
                    break;
                }
            }
예제 #26
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.IsEmpty)
     {
         t.EofError(this);
         t.Emit("</");
         t.Transition(Data);
     }
     else if (r.MatchesLetter())
     {
         t.CreateTagPending(false);
         t.Transition(TagName);
     }
     else if (r.Matches('>'))
     {
         t.Error(this);
         t.AdvanceTransition(Data);
     }
     else
     {
         t.Error(this);
         t.AdvanceTransition(BogusComment);
     }
 }
예제 #27
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchConsume("--"))
     {
         t.CreateCommentPending();
         t.Transition(CommentStart);
     }
     else if (r.MatchConsumeIgnoreCase("DOCTYPE"))
     {
         t.Transition(Doctype);
     }
     else if (r.MatchConsume("[CDATA["))
     {
         // TODO: should actually check current namepspace, and only non-html allows cdata. until namespace
         // is implemented properly, keep handling as cdata (HtmlCDataSection)
         //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) {
         t.Transition(CdataSection);
     }
     else
     {
         t.Error(this);
         t.AdvanceTransition(BogusComment); // advance so this char gets in bogus comment data's rewind
     }
 }
예제 #28
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                switch (r.Current)
                {
                case '<':
                    t.AdvanceTransition(ScriptDataLessthanSign);
                    break;

                case nullChar:
                    t.Error(this);
                    r.Advance();
                    t.Emit(replacementChar);
                    break;

                case eof:
                    t.Emit(Token.EOF.Instance);
                    break;

                default:
                    string data = r.ConsumeToAny('<', nullChar);
                    t.Emit(data);
                    break;
                }
            }
예제 #29
0
 private void AnythingElse(Tokeniser t, CharacterReader r)
 {
     t.Emit("</" + t.dataBuffer.ToString());
     t.Transition(ScriptDataEscaped);
 }
예제 #30
0
 public abstract void Read(Tokeniser t, CharacterReader r);