Beispiel #1
0
 public void consume()
 {
     CharacterReader r = new CharacterReader("one");
     Assert.AreEqual(0, r.Position);
     Assert.AreEqual('o', r.Current());
     Assert.AreEqual('o', r.Consume());
     Assert.AreEqual(1, r.Position);
     Assert.AreEqual('n', r.Current());
     Assert.AreEqual(1, r.Position);
     Assert.AreEqual('n', r.Consume());
     Assert.AreEqual('e', r.Consume());
     Assert.IsTrue(r.IsEmpty());
     Assert.AreEqual(CharacterReader.EOF, r.Consume());
     Assert.IsTrue(r.IsEmpty());
     Assert.AreEqual(CharacterReader.EOF, r.Consume());
 }
Beispiel #2
0
 // in data state, gather characters until a character reference or tag is found
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Current())
     {
         case '&':
             t.AdvanceTransition(CharacterReferenceInData);
             break;
         case '<':
             t.AdvanceTransition(TagOpen);
             break;
         case _nullChar:
             t.Error(this); // NOT replacement character (oddly?)
             t.Emit(r.Consume());
             break;
         case _eof:
             t.Emit(new Token.EOF());
             break;
         default:
             string data = r.ConsumeToAny('&', '<', _nullChar);
             t.Emit(data);
             break;
     }
 }
Beispiel #3
0
 /// handles data in title, textarea etc
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Current())
     {
         case '&':
             t.AdvanceTransition(CharacterReferenceInRcData);
             break;
         case '<':
             t.AdvanceTransition(RcDataLessThanSign);
             break;
         case _nullChar:
             t.Error(this);
             r.Advance();
             t.Emit(_replacementChar);
             break;
         case _eof:
             t.Emit(new Token.EOF());
             break;
         default:
             string data = r.ConsumeToAny('&', '<', _nullChar);
             t.Emit(data);
             break;
     }
 }
Beispiel #4
0
        public void unconsume()
        {
            CharacterReader r = new CharacterReader("one");
            Assert.AreEqual('o', r.Consume());
            Assert.AreEqual('n', r.Current());
            r.Unconsume();
            Assert.AreEqual('o', r.Current());

            Assert.AreEqual('o', r.Consume());
            Assert.AreEqual('n', r.Consume());
            Assert.AreEqual('e', r.Consume());
            Assert.IsTrue(r.IsEmpty());
            r.Unconsume();
            Assert.IsFalse(r.IsEmpty());
            Assert.AreEqual('e', r.Current());
            Assert.AreEqual('e', r.Consume());
            Assert.IsTrue(r.IsEmpty());

            Assert.AreEqual(CharacterReader.EOF, r.Consume());
            r.Unconsume();
            Assert.IsTrue(r.IsEmpty());
            Assert.AreEqual(CharacterReader.EOF, r.Current());
        }
Beispiel #5
0
        public char?ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute)
        {
            if (_reader.IsEmpty())
            {
                return(null);
            }

            if (additionalAllowedCharacter != null && additionalAllowedCharacter == _reader.Current())
            {
                return(null);
            }

            if (_reader.MatchesAny('\t', '\n', '\r', '\f', ' ', '<', '&'))
            {
                return(null);
            }

            _reader.Mark();
            if (_reader.MatchConsume("#"))
            { // numbered
                bool isHexMode = _reader.MatchConsumeIgnoreCase("X");

                string numRef = isHexMode ? _reader.ConsumeHexSequence() : _reader.ConsumeDigitSequence();

                if (numRef.Length == 0)
                { // didn't match anything
                    CharacterReferenceError("Numeric reference with no numerals");
                    _reader.RewindToMark();
                    return(null);
                }

                if (!_reader.MatchConsume(";"))
                {
                    CharacterReferenceError("Missing semicolon"); // missing semi
                }

                int charval = -1;
                try
                {
                    int numbase = isHexMode ? 16 : 10;
                    charval = Convert.ToInt32(numRef, numbase);
                }
                catch (FormatException)
                {
                } // skip
                if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF)
                {
                    CharacterReferenceError("Character outside of valid range");
                    return(ReplacementChar);
                }
                else
                {
                    // todo: implement number replacement table
                    // todo: check for extra illegal unicode points as parse errors
                    return((char)charval);
                }
            }
            else
            { // named
                // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found
                string nameRef    = _reader.ConsumeLetterThenDigitSequence();
                bool   looksLegit = _reader.Matches(';');

                // found if a base named entity without a ;, or an extended entity with the ;.
                bool found = (Entities.IsBaseNamedEntity(nameRef) || (Entities.IsNamedEntity(nameRef) && looksLegit));


                if (!found)
                {
                    _reader.RewindToMark();
                    if (looksLegit)
                    {
                        CharacterReferenceError(string.Format("Invalid named referenece '{0}'", nameRef));
                    }
                    return(null);
                }

                if (inAttribute && (_reader.MatchesLetter() || _reader.MatchesDigit() || _reader.MatchesAny('=', '-', '_')))
                {
                    // don't want that to match
                    _reader.RewindToMark();
                    return(null);
                }

                if (!_reader.MatchConsume(";"))
                {
                    CharacterReferenceError("Missing semicolon"); // missing semi
                }

                return(Entities.GetCharacterByName(nameRef));
            }
        }
Beispiel #6
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Current();
     switch (c)
     {
         case '-':
             t.Emit(c);
             t.AdvanceTransition(ScriptDataDoubleEscapedDash);
             break;
         case '<':
             t.Emit(c);
             t.AdvanceTransition(ScriptDataDoubleEscapedLessthanSign);
             break;
         case _nullChar:
             t.Error(this);
             r.Advance();
             t.Emit(_replacementChar);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         default:
             string data = r.ConsumeToAny('-', '<', _nullChar);
             t.Emit(data);
             break;
     }
 }
Beispiel #7
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateTagPending(false);
         t.TagPending.AppendTagName(char.ToLowerInvariant(r.Current()));
         t.DataBuffer.Append(r.Current());
         t.AdvanceTransition(ScriptDataEscapedEndTagName);
     }
     else
     {
         t.Emit("</");
         t.Transition(ScriptDataEscaped);
     }
 }
Beispiel #8
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateTempBuffer();
         t.DataBuffer.Append(char.ToLowerInvariant(r.Current()));
         t.Emit("<" + r.Current());
         t.AdvanceTransition(ScriptDataDoubleEscapeStart);
     }
     else if (r.Matches('/'))
     {
         t.CreateTempBuffer();
         t.AdvanceTransition(ScriptDataEscapedEndTagOpen);
     }
     else
     {
         t.Emit('<');
         t.Transition(ScriptDataEscaped);
     }
 }
Beispiel #9
0
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.IsEmpty())
                {
                    t.EofError(this);
                    t.Transition(Data);
                    return;
                }

                switch (r.Current())
                {
                    case '-':
                        t.Emit('-');
                        t.AdvanceTransition(ScriptDataEscapedDash);
                        break;
                    case '<':
                        t.AdvanceTransition(ScriptDataEscapedLessThanSign);
                        break;
                    case _nullChar:
                        t.Error(this);
                        r.Advance();
                        t.Emit(_replacementChar);
                        break;
                    default:
                        string data = r.ConsumeToAny('-', '<', _nullChar);
                        t.Emit(data);
                        break;
                }
            }
Beispiel #10
0
 // from < in data
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Current())
     {
         case '!':
             t.AdvanceTransition(MarkupDeclarationOpen);
             break;
         case '/':
             t.AdvanceTransition(EndTagOpen);
             break;
         case '?':
             t.AdvanceTransition(BogusComment);
             break;
         default:
             if (r.MatchesLetter())
             {
                 t.CreateTagPending(true);
                 t.Transition(TagName);
             }
             else
             {
                 t.Error(this);
                 t.Emit('<'); // char that got us here
                 t.Transition(Data);
             }
             break;
     }
 }
Beispiel #11
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Current())
     {
         case _nullChar:
             t.Error(this);
             r.Advance();
             t.Emit(_replacementChar);
             break;
         case _eof:
             t.Emit(new Token.EOF());
             break;
         default:
             string data = r.ConsumeTo(_nullChar);
             t.Emit(data);
             break;
     }
 }
Beispiel #12
0
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Current();
     switch (c)
     {
         case '-':
             t.AdvanceTransition(CommentEndDash);
             break;
         case _nullChar:
             t.Error(this);
             r.Advance();
             t.CommentPending.Data.Append(_replacementChar);
             break;
         case _eof:
             t.EofError(this);
             t.EmitCommentPending();
             t.Transition(Data);
             break;
         default:
             t.CommentPending.Data.Append(r.ConsumeToAny('-', _nullChar));
             break;
     }
 }