Beispiel #1
0
 public void consume()
 {
     CharacterReader r = new CharacterReader("one");
     Assert.AreEqual(0, r.Position);
     Assert.AreEqual('o', r.Current());
     Assert.AreEqual('o', r.Consume());
     Assert.AreEqual(1, r.Position);
     Assert.AreEqual('n', r.Current());
     Assert.AreEqual(1, r.Position);
     Assert.AreEqual('n', r.Consume());
     Assert.AreEqual('e', r.Consume());
     Assert.IsTrue(r.IsEmpty());
     Assert.AreEqual(CharacterReader.EOF, r.Consume());
     Assert.IsTrue(r.IsEmpty());
     Assert.AreEqual(CharacterReader.EOF, r.Consume());
 }
Beispiel #2
0
        /// <summary>
        /// Utility method to consume reader and unescape entities found within.
        /// </summary>
        /// <param name="inAttribute"></param>
        /// <returns>Unescaped string from reader</returns>
        public string UnescapeEntities(bool inAttribute)
        {
            StringBuilder builder = new StringBuilder();

            while (!_reader.IsEmpty())
            {
                builder.Append(_reader.ConsumeTo('&'));
                if (_reader.Matches('&'))
                {
                    _reader.Consume();
                    char?c = ConsumeCharacterReference(null, inAttribute);
                    if (c == null)
                    {
                        builder.Append('&');
                    }
                    else
                    {
                        builder.Append(c);
                    }
                }
            }
            return(builder.ToString());
        }
Beispiel #3
0
 // in data state, gather characters until a character reference or tag is found
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Current())
     {
         case '&':
             t.AdvanceTransition(CharacterReferenceInData);
             break;
         case '<':
             t.AdvanceTransition(TagOpen);
             break;
         case _nullChar:
             t.Error(this); // NOT replacement character (oddly?)
             t.Emit(r.Consume());
             break;
         case _eof:
             t.Emit(new Token.EOF());
             break;
         default:
             string data = r.ConsumeToAny('&', '<', _nullChar);
             t.Emit(data);
             break;
     }
 }
Beispiel #4
0
 public void matchesIgnoreCase()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.IsTrue(r.MatchesIgnoreCase("O"));
     Assert.IsTrue(r.MatchesIgnoreCase("o"));
     Assert.IsTrue(r.Matches('O'));
     Assert.IsFalse(r.Matches('o'));
     Assert.IsTrue(r.MatchesIgnoreCase("One Two Three"));
     Assert.IsTrue(r.MatchesIgnoreCase("ONE two THREE"));
     Assert.IsTrue(r.MatchesIgnoreCase("One"));
     Assert.IsTrue(r.MatchesIgnoreCase("one"));
     Assert.AreEqual('O', r.Consume());
     Assert.IsFalse(r.MatchesIgnoreCase("One"));
     Assert.IsTrue(r.MatchesIgnoreCase("NE Two Three"));
     Assert.IsFalse(r.MatchesIgnoreCase("ne Two Three Four"));
     Assert.AreEqual("ne Two Three", r.ConsumeToEnd());
     Assert.IsFalse(r.MatchesIgnoreCase("ne"));
 }
Beispiel #5
0
 public void consumeLetterThenDigitSequence()
 {
     CharacterReader r = new CharacterReader("One12 Two &bar; qux");
     Assert.AreEqual("One12", r.ConsumeLetterThenDigitSequence());
     Assert.AreEqual(' ', r.Consume());
     Assert.AreEqual("Two", r.ConsumeLetterThenDigitSequence());
     Assert.AreEqual(" &bar; qux", r.ConsumeToEnd());
 }
Beispiel #6
0
 public void advance()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.AreEqual('O', r.Consume());
     r.Advance();
     Assert.AreEqual('e', r.Consume());
 }
Beispiel #7
0
 public void consumeToChar()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.AreEqual("One ", r.ConsumeTo('T'));
     Assert.AreEqual("", r.ConsumeTo('T')); // on Two
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("wo ", r.ConsumeTo('T'));
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("hree", r.ConsumeTo('T')); // consume to end
 }
Beispiel #8
0
 public void mark()
 {
     CharacterReader r = new CharacterReader("one");
     r.Consume();
     r.Mark();
     Assert.AreEqual('n', r.Consume());
     Assert.AreEqual('e', r.Consume());
     Assert.IsTrue(r.IsEmpty());
     r.RewindToMark();
     Assert.AreEqual('n', r.Consume());
 }
Beispiel #9
0
 // from tagname <xxx
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '\t':
         case '\n':
         case '\r':
         case '\f':
         case ' ':
             break; // ignore whitespace
         case '/':
             t.Transition(SelfClosingStartTag);
             break;
         case '>':
             t.EmitTagPending();
             t.Transition(Data);
             break;
         case _nullChar:
             t.Error(this);
             t.TagPending.NewAttribute();
             r.Unconsume();
             t.Transition(AttributeName);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         case '"':
         case '\'':
         case '<':
         case '=':
             t.Error(this);
             t.TagPending.NewAttribute();
             t.TagPending.AppendAttributeName(c);
             t.Transition(AttributeName);
             break;
         default: // A-Z, anything else
             t.TagPending.NewAttribute();
             r.Unconsume();
             t.Transition(AttributeName);
             break;
     }
 }
Beispiel #10
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '\t':
         case '\n':
         case '\r':
         case '\f':
         case ' ':
             break;
         case '"':
             // set system id to empty string
             t.Transition(DoctypeSystemIdentifierDoubleQuoted);
             break;
         case '\'':
             // set public id to empty string
             t.Transition(DoctypeSystemIdentifierSingleQuoted);
             break;
         case '>':
             t.Error(this);
             t.DoctypePending.ForceQuirks = true;
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         case _eof:
             t.EofError(this);
             t.DoctypePending.ForceQuirks = true;
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         default:
             t.Error(this);
             t.DoctypePending.ForceQuirks = true;
             t.Transition(BogusDoctype);
             break;
     }
 }
Beispiel #11
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         string name = r.ConsumeLetterSequence();
         t.DoctypePending.Name.Append(name.ToLowerInvariant());
         return;
     }
     char c = r.Consume();
     switch (c)
     {
         case '>':
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         case '\t':
         case '\n':
         case '\r':
         case '\f':
         case ' ':
             t.Transition(AfterDoctypeName);
             break;
         case _nullChar:
             t.Error(this);
             t.DoctypePending.Name.Append(_replacementChar);
             break;
         case _eof:
             t.EofError(this);
             t.DoctypePending.ForceQuirks = true;
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         default:
             t.DoctypePending.Name.Append(c);
             break;
     }
 }
Beispiel #12
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateDoctypePending();
         t.Transition(DoctypeName);
         return;
     }
     char c = r.Consume();
     switch (c)
     {
         case '\t':
         case '\n':
         case '\r':
         case '\f':
         case ' ':
             break; // ignore whitespace
         case _nullChar:
             t.Error(this);
             t.DoctypePending.Name.Append(_replacementChar);
             t.Transition(DoctypeName);
             break;
         case _eof:
             t.EofError(this);
             t.CreateDoctypePending();
             t.DoctypePending.ForceQuirks = true;
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         default:
             t.CreateDoctypePending();
             t.DoctypePending.Name.Append(c);
             t.Transition(DoctypeName);
             break;
     }
 }
Beispiel #13
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '-':
             t.CommentPending.Data.Append("--!");
             t.Transition(CommentEndDash);
             break;
         case '>':
             t.EmitCommentPending();
             t.Transition(Data);
             break;
         case _nullChar:
             t.Error(this);
             t.CommentPending.Data.Append("--!").Append(_replacementChar);
             t.Transition(Comment);
             break;
         case _eof:
             t.EofError(this);
             t.EmitCommentPending();
             t.Transition(Data);
             break;
         default:
             t.CommentPending.Data.Append("--!").Append(c);
             t.Transition(Comment);
             break;
     }
 }
Beispiel #14
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '>':
             t.TagPending.IsSelfClosing = true;
             t.EmitTagPending();
             t.Transition(Data);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         default:
             t.Error(this);
             t.Transition(BeforeAttributeName);
             break;
     }
 }
Beispiel #15
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '\t':
         case '\n':
         case '\r':
         case '\f':
         case ' ':
             t.Transition(BeforeAttributeName);
             break;
         case '/':
             t.Transition(SelfClosingStartTag);
             break;
         case '>':
             t.EmitTagPending();
             t.Transition(Data);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         default:
             t.Error(this);
             r.Unconsume();
             t.Transition(BeforeAttributeName);
             break;
     }
 }
Beispiel #16
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    string name = r.ConsumeLetterSequence();
                    t.DataBuffer.Append(name.ToLowerInvariant());
                    t.Emit(name);
                    return;
                }

                char c = r.Consume();
                switch (c)
                {
                    case '\t':
                    case '\n':
                    case '\r':
                    case '\f':
                    case ' ':
                    case '/':
                    case '>':
                        if (t.DataBuffer.ToString().Equals("script"))
                        {
                            t.Transition(ScriptDataEscaped);
                        }
                        else
                        {
                            t.Transition(ScriptDataDoubleEscaped);
                        }
                        t.Emit(c);
                        break;
                    default:
                        r.Unconsume();
                        t.Transition(ScriptDataDoubleEscaped);
                        break;
                }
            }
Beispiel #17
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '\'':
             t.Transition(AfterDoctypeSystemIdentifier);
             break;
         case _nullChar:
             t.Error(this);
             t.DoctypePending.SystemIdentifier.Append(_replacementChar);
             break;
         case '>':
             t.Error(this);
             t.DoctypePending.ForceQuirks = true;
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         case _eof:
             t.EofError(this);
             t.DoctypePending.ForceQuirks = true;
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         default:
             t.DoctypePending.SystemIdentifier.Append(c);
             break;
     }
 }
Beispiel #18
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '\t':
         case '\n':
         case '\r':
         case '\f':
         case ' ':
             break;
         case '>':
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         case _eof:
             t.EofError(this);
             t.DoctypePending.ForceQuirks = true;
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         default:
             t.Error(this);
             t.Transition(BogusDoctype);
             break;
         // NOT force quirks
     }
 }
Beispiel #19
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '>':
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         case _eof:
             t.EmitDoctypePending();
             t.Transition(Data);
             break;
         default:
             // ignore char
             break;
     }
 }
Beispiel #20
0
        public void nextIndexOfChar()
        {
            string input = "blah blah";
            CharacterReader r = new CharacterReader(input);

            Assert.AreEqual(-1, r.NextIndexOf('x'));
            Assert.AreEqual(3, r.NextIndexOf('h'));
            String pull = r.ConsumeTo('h');
            Assert.AreEqual("bla", pull);
            r.Consume();
            Assert.AreEqual(2, r.NextIndexOf('l'));
            Assert.AreEqual(" blah", r.ConsumeToEnd());
            Assert.AreEqual(-1, r.NextIndexOf('x'));
        }
Beispiel #21
0
            // from < or </ in data, will have start or end tag pending
            public override void Read(Tokeniser t, CharacterReader r)
            {
                // previous TagOpen state did NOT Consume, will have a letter char in current
                string tagName = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '/', '>', _nullChar).ToLowerInvariant();
                t.TagPending.AppendTagName(tagName);

                switch (r.Consume())
                {
                    case '\t':
                    case '\n':
                    case '\r':
                    case '\f':
                    case ' ':
                        t.Transition(BeforeAttributeName);
                        break;
                    case '/':
                        t.Transition(SelfClosingStartTag);
                        break;
                    case '>':
                        t.EmitTagPending();
                        t.Transition(Data);
                        break;
                    case _nullChar: // replacement
                        t.TagPending.AppendTagName(_replacementStr);
                        break;
                    case _eof: // should Emit pending tag?
                        t.EofError(this);
                        t.Transition(Data);
                        break;
                    // no default, as covered with above ConsumeToAny
                }
            }
Beispiel #22
0
 public void consumeToString()
 {
     CharacterReader r = new CharacterReader("One Two Two Four");
     Assert.AreEqual("One ", r.ConsumeTo("Two"));
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("wo ", r.ConsumeTo("Two"));
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("wo Four", r.ConsumeTo("Qux"));
 }
Beispiel #23
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Consume())
     {
         case '/':
             t.CreateTempBuffer();
             t.Transition(ScriptDataEndTagOpen);
             break;
         case '!':
             t.Emit("<!");
             t.Transition(ScriptDataEscapeStart);
             break;
         default:
             t.Emit("<");
             r.Unconsume();
             t.Transition(ScriptData);
             break;
     }
 }
Beispiel #24
0
 public void consumeToAny()
 {
     CharacterReader r = new CharacterReader("One &bar; qux");
     Assert.AreEqual("One ", r.ConsumeToAny('&', ';'));
     Assert.IsTrue(r.Matches('&'));
     Assert.IsTrue(r.Matches("&bar;"));
     Assert.AreEqual('&', r.Consume());
     Assert.AreEqual("bar", r.ConsumeToAny('&', ';'));
     Assert.AreEqual(';', r.Consume());
     Assert.AreEqual(" qux", r.ConsumeToAny('&', ';'));
 }
Beispiel #25
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.IsEmpty())
                {
                    t.EofError(this);
                    t.Transition(Data);
                    return;
                }

                char c = r.Consume();
                switch (c)
                {
                    case '-':
                        t.Emit(c);
                        t.Transition(ScriptDataEscapedDashDash);
                        break;
                    case '<':
                        t.Transition(ScriptDataEscapedLessThanSign);
                        break;
                    case _nullChar:
                        t.Error(this);
                        t.Emit(_replacementChar);
                        t.Transition(ScriptDataEscaped);
                        break;
                    default:
                        t.Emit(c);
                        t.Transition(ScriptDataEscaped);
                        break;
                }
            }
Beispiel #26
0
 public void matches()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.IsTrue(r.Matches('O'));
     Assert.IsTrue(r.Matches("One Two Three"));
     Assert.IsTrue(r.Matches("One"));
     Assert.IsFalse(r.Matches("one"));
     Assert.AreEqual('O', r.Consume());
     Assert.IsFalse(r.Matches("One"));
     Assert.IsTrue(r.Matches("ne Two Three"));
     Assert.IsFalse(r.Matches("ne Two Three Four"));
     Assert.AreEqual("ne Two Three", r.ConsumeToEnd());
     Assert.IsFalse(r.Matches("ne"));
 }
Beispiel #27
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    string name = r.ConsumeLetterSequence();
                    t.TagPending.AppendTagName(name.ToLowerInvariant());
                    t.DataBuffer.Append(name);
                    
                    return;
                }

                if (t.IsAppropriateEndTagToken() && !r.IsEmpty())
                {
                    char c = r.Consume();
                    switch (c)
                    {
                        case '\t':
                        case '\n':
                        case '\r':
                        case '\f':
                        case ' ':
                            t.Transition(BeforeAttributeName);
                            break;
                        case '/':
                            t.Transition(SelfClosingStartTag);
                            break;
                        case '>':
                            t.EmitTagPending();
                            t.Transition(Data);
                            break;
                        default:
                            t.DataBuffer.Append(c);
                            AnythingElse(t, r);
                            break;
                    }
                }
                else
                {
                    AnythingElse(t, r);
                }
            }
Beispiel #28
0
 public void matchesAny()
 {
     char[] scan = { ' ', '\n', '\t' };
     CharacterReader r = new CharacterReader("One\nTwo\tThree");
     Assert.IsFalse(r.MatchesAny(scan));
     Assert.AreEqual("One", r.ConsumeToAny(scan));
     Assert.IsTrue(r.MatchesAny(scan));
     Assert.AreEqual('\n', r.Consume());
     Assert.IsFalse(r.MatchesAny(scan));
 }
Beispiel #29
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '-':
             t.Emit(c);
             break;
         case '<':
             t.Emit(c);
             t.Transition(ScriptDataDoubleEscapedLessthanSign);
             break;
         case '>':
             t.Emit(c);
             t.Transition(ScriptData);
             break;
         case _nullChar:
             t.Error(this);
             t.Emit(_replacementChar);
             t.Transition(ScriptDataDoubleEscaped);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         default:
             t.Emit(c);
             t.Transition(ScriptDataDoubleEscaped);
             break;
     }
 }
Beispiel #30
0
        public void unconsume()
        {
            CharacterReader r = new CharacterReader("one");
            Assert.AreEqual('o', r.Consume());
            Assert.AreEqual('n', r.Current());
            r.Unconsume();
            Assert.AreEqual('o', r.Current());

            Assert.AreEqual('o', r.Consume());
            Assert.AreEqual('n', r.Consume());
            Assert.AreEqual('e', r.Consume());
            Assert.IsTrue(r.IsEmpty());
            r.Unconsume();
            Assert.IsFalse(r.IsEmpty());
            Assert.AreEqual('e', r.Current());
            Assert.AreEqual('e', r.Consume());
            Assert.IsTrue(r.IsEmpty());

            Assert.AreEqual(CharacterReader.EOF, r.Consume());
            r.Unconsume();
            Assert.IsTrue(r.IsEmpty());
            Assert.AreEqual(CharacterReader.EOF, r.Current());
        }
Beispiel #31
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                string value = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '&', '>', _nullChar, '"', '\'', '<', '=', '`');
                if (value.Length > 0)
                {
                    t.TagPending.AppendAttributeValue(value);
                }

                char c = r.Consume();
                switch (c)
                {
                    case '\t':
                    case '\n':
                    case '\r':
                    case '\f':
                    case ' ':
                        t.Transition(BeforeAttributeName);
                        break;
                    case '&':
                        char? reference = t.ConsumeCharacterReference('>', true);
                        if (reference != null)
                        {
                            t.TagPending.AppendAttributeValue(reference.Value);
                        }
                        else
                        {
                            t.TagPending.AppendAttributeValue('&');
                        }
                        break;
                    case '>':
                        t.EmitTagPending();
                        t.Transition(Data);
                        break;
                    case _nullChar:
                        t.Error(this);
                        t.TagPending.AppendAttributeValue(_replacementChar);
                        break;
                    case _eof:
                        t.EofError(this);
                        t.Transition(Data);
                        break;
                    case '"':
                    case '\'':
                    case '<':
                    case '=':
                    case '`':
                        t.Error(this);
                        t.TagPending.AppendAttributeValue(c);
                        break;
                    // no default, handled in Consume to any above
                }

            }