Beispiel #1
0
        public Token Read()
        {
            if (!selfClosingFlagAcknowledged)
            {
                ParseError.SelfClosingTagNotAcknowledged(this);
                selfClosingFlagAcknowledged = true;
            }

            while (!isEmitPending)
            {
                state.Read(this, reader);
            }

            // if emit is pending, a non-char token was found: return any chars in buffer, and leave token for next read:
            if (charBuffer.Length > 0)
            {
                string str = charBuffer.ToString();
                charBuffer.Remove(0, charBuffer.Length);
                return(new Token.Character(str));
            }
            else
            {
                isEmitPending = false;
                return(emitPending);
            }
        }
Beispiel #2
0
        public void Emit(Token token)
        {
            if (isEmitPending)
            {
                HtmlWarning.UnreadTokenPending();
            }

            emitPending   = token;
            isEmitPending = true;
            if (token.Type == TokenType.StartTag)
            {
                Token.StartTag startTag = (Token.StartTag)token;
                lastStartTag = startTag;

                if (startTag.selfClosing)
                {
                    selfClosingFlagAcknowledged = false;
                }
            }
            else if (token.Type == TokenType.EndTag)
            {
                Token.EndTag endTag = (Token.EndTag)token;

                if (endTag.Attributes.Any())
                {
                    ParseError.AttributesPresentOnEndTagError(this);
                }
            }
        }
Beispiel #3
0
 public void EofError(TokeniserState state)
 {
     ParseError.UnexpectedlyReachedEof(errors, reader, state);
 }
Beispiel #4
0
 public void Error(TokeniserState state)
 {
     ParseError.UnexpectedChar(errors, reader, state);
 }
Beispiel #5
0
        public string ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute)
        {
            if (reader.IsEmpty)
            {
                return(null);
            }

            if (additionalAllowedCharacter.HasValue && additionalAllowedCharacter.Value == reader.Current)
            {
                return(null);
            }

            if (reader.MatchesAny('\t', '\n', '\f', ' ', '<', '&'))
            {
                return(null);
            }

            reader.Mark();
            if (reader.MatchConsume("#"))   // numbered
            {
                bool   isHexMode = reader.MatchConsumeIgnoreCase("X");
                string numRef    = isHexMode ? reader.ConsumeHexSequence() : reader.ConsumeDigitSequence();

                if (numRef.Length == 0)   // didn't match anything
                {
                    ParseError.NumericReferenceWithNoNumerals(this);
                    reader.RewindToMark();
                    return(null);
                }

                if (!reader.MatchConsume(";"))
                {
                    ParseError.MissingSemicolon(this);
                }

                int charval = -1;
                try {
                    int base2 = isHexMode ? 16 : 10;
                    charval = Convert.ToInt32(numRef, base2);
                } catch (FormatException) {
                } // skip

                if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF)
                {
                    ParseError.CharOutsideRange(this);
                    return(replacementStr);
                }
                else
                {
                    // TODO: implement number replacement table
                    // TODO: check for extra illegal unicode points as parse errors
                    return(Char.ConvertFromUtf32(charval));
                }
            }
            else     // named
            // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found
            {
                string nameRef     = reader.ConsumeLetterThenDigitSequence();
                string origNameRef = nameRef; // for error reporting. nameRef gets chomped looking for matches

                bool looksLegit = reader.Matches(';');
                bool found      = false;
                while (nameRef.Length > 0 && !found)
                {
                    if (HtmlEncoder.IsNamedEntity(nameRef))
                    {
                        found = true;
                    }
                    else
                    {
                        nameRef = nameRef.Substring(0, nameRef.Length - 1);
                        reader.Unconsume();
                    }
                }

                if (!found)
                {
                    if (looksLegit) // named with semicolon
                    {
                        ParseError.InvalidNamedReference(this, origNameRef);
                    }

                    reader.RewindToMark();
                    return(null);
                }

                if (inAttribute && (reader.MatchesLetter() || reader.MatchesDigit() || reader.MatchesAny('=', '-', '_')))
                {
                    // don't want that to match
                    reader.RewindToMark();
                    return(null);
                }

                if (!reader.MatchConsume(";"))
                {
                    ParseError.MissingSemicolon(this);
                }

                return(HtmlEncoder.GetCharacterByName(nameRef));
            }
        }
Beispiel #6
0
 public void Error(HtmlTreeBuilderState state)
 {
     ParseError.UnexpectedToken(errors, reader.Position, state, currentToken);
 }