예제 #1
0
        public void ConsumeLetterThenDigitSequence()
        {
            CharacterReader r = new CharacterReader("One12 Two &bar; qux");

            Assert.AreEqual("One12", r.ConsumeLetterThenDigitSequence());
            Assert.AreEqual(' ', r.Consume());
            Assert.AreEqual("Two", r.ConsumeLetterThenDigitSequence());
            Assert.AreEqual(" &bar; qux", r.ConsumeToEnd());
        }
예제 #2
0
        public char?ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute)
        {
            if (_reader.IsEmpty())
            {
                return(null);
            }

            if (additionalAllowedCharacter != null && additionalAllowedCharacter == _reader.Current())
            {
                return(null);
            }

            if (_reader.MatchesAny('\t', '\n', '\r', '\f', ' ', '<', '&'))
            {
                return(null);
            }

            _reader.Mark();
            if (_reader.MatchConsume("#"))
            { // numbered
                bool isHexMode = _reader.MatchConsumeIgnoreCase("X");

                string numRef = isHexMode ? _reader.ConsumeHexSequence() : _reader.ConsumeDigitSequence();

                if (numRef.Length == 0)
                { // didn't match anything
                    CharacterReferenceError("Numeric reference with no numerals");
                    _reader.RewindToMark();
                    return(null);
                }

                if (!_reader.MatchConsume(";"))
                {
                    CharacterReferenceError("Missing semicolon"); // missing semi
                }

                int charval = -1;
                try
                {
                    int numbase = isHexMode ? 16 : 10;
                    charval = Convert.ToInt32(numRef, numbase);
                }
                catch (FormatException)
                {
                } // skip
                if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF)
                {
                    CharacterReferenceError("Character outside of valid range");
                    return(ReplacementChar);
                }
                else
                {
                    // todo: implement number replacement table
                    // todo: check for extra illegal unicode points as parse errors
                    return((char)charval);
                }
            }
            else
            { // named
                // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found
                string nameRef    = _reader.ConsumeLetterThenDigitSequence();
                bool   looksLegit = _reader.Matches(';');

                // found if a base named entity without a ;, or an extended entity with the ;.
                bool found = (Entities.IsBaseNamedEntity(nameRef) || (Entities.IsNamedEntity(nameRef) && looksLegit));


                if (!found)
                {
                    _reader.RewindToMark();
                    if (looksLegit)
                    {
                        CharacterReferenceError(string.Format("Invalid named referenece '{0}'", nameRef));
                    }
                    return(null);
                }

                if (inAttribute && (_reader.MatchesLetter() || _reader.MatchesDigit() || _reader.MatchesAny('=', '-', '_')))
                {
                    // don't want that to match
                    _reader.RewindToMark();
                    return(null);
                }

                if (!_reader.MatchConsume(";"))
                {
                    CharacterReferenceError("Missing semicolon"); // missing semi
                }

                return(Entities.GetCharacterByName(nameRef));
            }
        }