public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c;
            do
            {
                c = Read(reader);
            } while (IsWhitespace(c));

            if (c == '>')
            {
                tokenizer.EmitToken(Token);
                return DataState.Instance;
            }

            if (c == -1)
            {
                ReportParseError();
                Token.ForceQuirks = true;
                tokenizer.EmitToken(Token);
                return DataState.Instance;
                // Reconsume the EOF character (?)
            }

            ReportParseError();
            return BogusDocTypeState.Instance;
        }
Пример #2
0
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c = Read(reader);
            switch (c)
            {
                case '-':
                    return CommentStartDashState.Instance;

                case 0:
                    ReportParseError();
                    Token.Comment += '\uFFFD';
                    CommentState.Instance.Token = Token;
                    return CommentState.Instance;

                case '>':
                    ReportParseError();
                    tokenizer.EmitToken(Token);
                    return DataState.Instance;

                case -1:
                    ReportParseError();
                    tokenizer.EmitToken(Token);
                    return DataState.Instance;
                    // Reconsume the EOF character (?)

                default:
                    Token.Comment += (char)c;
                    CommentState.Instance.Token = Token;
                    return CommentState.Instance;
            }
        }
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c;
            do
            {
                c = Read(reader);
            } while (IsWhitespace(c));

            if (c == '>')
            {
                tokenizer.EmitToken(Token);
                return DataState.Instance;
            }

            if (c == -1)
            {
                // Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag to on. Emit that DOCTYPE token. Reconsume the EOF character.
                ReportParseError();
                Token.ForceQuirks = true;
                tokenizer.EmitToken(Token);
                return DataState.Instance;
            }

            char[] buffer = new char[6];
            buffer[0] = (char)c;
            ReadBlock(reader, buffer, 1, 5);
            string bufferStr = new string(buffer);

            // If the six characters starting from the current input character are an ASCII case-insensitive match for the
            // word "PUBLIC", then consume those characters and switch to the after DOCTYPE public keyword state.
            if (bufferStr.Equals("public", StringComparison.OrdinalIgnoreCase))
            {
                AfterDocTypePublicKeywordState.Instance.Token = Token;
                return AfterDocTypePublicKeywordState.Instance;
            }

            // Otherwise, if the six characters starting from the current input character are an ASCII case-insensitive match
            // for the word "SYSTEM", then consume those characters and switch to the after DOCTYPE system keyword state.
            if (bufferStr.Equals("system", StringComparison.OrdinalIgnoreCase))
            {
                return AfterDocTypeSystemKeywordState.Instance;
            }

            // Otherwise, this is a parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the bogus DOCTYPE
            // state.
            reader.BaseStream.Seek(-6, SeekOrigin.Current);
            ReportParseError();
            Token.ForceQuirks = true;
            BogusCommentState.Instance.Comment.Append(bufferStr);
            return BogusCommentState.Instance;
        }
Пример #4
0
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c = Read(reader);
            switch (c)
            {
                case '&':
                    return CharacterReferenceInRCDATAState.Instance;

                case '<':
                    return RCDATALessThanSignState.Instance;

                case 0:
                    ReportParseError();
                    tokenizer.EmitChar('\uFFFD');
                    return this;

                case -1:
                    tokenizer.EmitToken(new EndOfFileToken());
                    return this;

                default:
                    tokenizer.EmitChar((char)c);
                    return this;
            }
        }
Пример #5
0
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            for (;;)
            {
                int c = Read(reader);
                switch (c)
                {
                    case '&':
                        CharacterReferenceInDataState.Instance.Process(tokenizer, reader, null);
                        break;

                    case '<':
                        return TagOpenState.Instance;

                    case '\0':
                        ReportParseError();
                        tokenizer.EmitChar('\0');
                        break;

                    case -1:
                        tokenizer.EmitToken(new EndOfFileToken());
                        return null;

                    default:
                        tokenizer.EmitChar((char)c);
                        return this; // Required to allow switching the state.
                }
            }
        }
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c = Read(reader);
            if (IsWhitespace(c))
            {
                BeforeAttributeNameState.Instance.Token = Token.ContainingTag;
                return BeforeAttributeNameState.Instance;
            }

            if (c == '/')
            {
                SelfClosingStartTagState.Instance.Token = Token.ContainingTag;
                return SelfClosingStartTagState.Instance;
            }

            if (c == '>')
            {
                tokenizer.EmitToken(Token);
                return DataState.Instance;
            }

            if (c == -1)
            {
                ReportParseError();
                return DataState.Instance;
                //reconsume...
            }

            ReportParseError();
            BeforeAttributeNameState.Instance.Token = Token.ContainingTag;
            BeforeDocTypeNameState.Instance.LastConsumedCharacters.Enqueue((char)c);
            return BeforeAttributeNameState.Instance;
        }
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c = Read(reader);
            if (IsWhitespace(c))
            {
                BeforeDocTypePublicIdentifierState.Instance.Token = Token;
                return BeforeDocTypePublicIdentifierState.Instance;
            }

            if (c == '"')
            {
                ReportParseError();
                Token.PublicIdentifier = string.Empty;
                DocTypePublicIdentifierQuotedState.InstanceDoubleQuoted.Token = Token;
                return DocTypePublicIdentifierQuotedState.InstanceDoubleQuoted;
            }

            if (c == '\'')
            {
                ReportParseError();
                Token.PublicIdentifier = string.Empty;
                DocTypePublicIdentifierQuotedState.InstanceSingleQuoted.Token = Token;
                return DocTypePublicIdentifierQuotedState.InstanceSingleQuoted;
            }

            if (c == '>')
            {
                ReportParseError();
                Token.ForceQuirks = true;
                tokenizer.EmitToken(Token);
                return DataState.Instance;
            }

            if (c == -1)
            {
                ReportParseError();
                Token.ForceQuirks = true;
                tokenizer.EmitToken(Token);
                return DataState.Instance;
                // Reconsume the EOF character. (?)
            }

            ReportParseError();
            Token.ForceQuirks = true;
            BogusDocTypeState.Instance.Token = Token;
            return BogusDocTypeState.Instance;
        }
Пример #8
0
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            for (;;)
            {
                int c = Read(reader);
                if (IsWhitespace(c))
                {
                    AfterDocTypeNameState.Instance.Token = Token;
                    return AfterDocTypeNameState.Instance;
                }

                if (c == '>')
                {
                    tokenizer.EmitToken(Token);
                    return DataState.Instance;
                }

                if (base.IsUppercaseAsciiLetter(c))
                {
                    // Append the lowercase version of the current input character (add 0x0020 to the character's code point) to the current tag token's tag name.
                    Token.Name += Char.ToLower((char)c);
                    continue;
                }

                if (c == 0)
                {
                    // Parse error. Append a U+FFFD REPLACEMENT CHARACTER character to the current tag token's tag name.
                    ReportParseError();
                    Token.Name += "\uFFFD";
                    continue;
                }

                if (c == -1)
                {
                    // Parse error. Switch to the data state. Reconsume the EOF character.
                    ReportParseError();
                    Token.ForceQuirks = true;
                    tokenizer.EmitToken(Token);
                    return DataState.Instance;
                }

                // Append the current input character to the current tag token's tag name.
                Token.Name += (char)c;
            }
        }
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c;
            do
            {
                c = Read(reader);
            } while (IsWhitespace(c));

            if (c == '"')
            {
                AttributeValueQuotedState.InstanceDoubleQuoted.Token = Token;
                return AttributeValueQuotedState.InstanceDoubleQuoted;
            }

            if (c == '&')
            {
                AttributeValueUnquotedState.Instance.Token = Token;
                AttributeValueUnquotedState.Instance.LastConsumedCharacters.Enqueue((char)c);
                return AttributeValueUnquotedState.Instance;
            }

            if (c == '\'')
            {
                AttributeValueQuotedState.InstanceSingleQuoted.Token = Token;
                return AttributeValueQuotedState.InstanceSingleQuoted;
            }

            if (c == 0)
            {
                ReportParseError();
                Token.AttributeValue += "\uFFFD";
                AttributeValueUnquotedState.Instance.Token = Token;
                return AttributeValueUnquotedState.Instance;
            }

            if (c == '>')
            {
                ReportParseError();
                tokenizer.EmitToken(Token);
                return DataState.Instance;
            }

            if (c == -1)
            {
                ReportParseError();
                return DataState.Instance; //reconsume... ?
            }

            if (c == '<' || c == '=' || c == '`')
            {
                ReportParseError();
            }

            Token.AttributeValue += (char)c;
            AttributeValueUnquotedState.Instance.Token = Token;
            return AttributeValueUnquotedState.Instance;
        }
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c;
            do
            {
                c = Read(reader);
            } while (IsWhitespace(c));

            DocTypeToken token = new DocTypeToken();
            DocTypeNameState.Instance.Token = token;

            if (base.IsUppercaseAsciiLetter(c))
            {
                token.Name = Char.ToLower((char)c).ToString();
                return DocTypeNameState.Instance;
            }

            if (c == 0)
            {
                ReportParseError();
                token.Name = "\uFFFD";
                return DocTypeNameState.Instance;
            }

            if (c == '>')
            {
                ReportParseError();
                token.ForceQuirks = true;
                tokenizer.EmitToken(token);
                return DataState.Instance;
            }

            if (c == -1)
            {
                ReportParseError();
                token.ForceQuirks = true;
                tokenizer.EmitToken(token);
                return DataState.Instance;
                // Parse error. Switch to the data state. Reconsume the EOF character.
            }

            token.Name = ((char)c).ToString();
            return DocTypeNameState.Instance;
        }
Пример #11
0
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            for (;;)
            {
                int c = Read(reader);
                switch (c)
                {
                    case '>':
                        tokenizer.EmitToken(Token);
                        return DataState.Instance;

                    case 0:
                        ReportParseError();
                        Token.Comment += "--\uFFFD";
                        CommentState.Instance.Token = Token;
                        return CommentState.Instance;

                    case '!':
                        ReportParseError();
                        CommentEndBangState.Instance.Token = Token;
                        return CommentEndBangState.Instance;

                    case '-':
                        ReportParseError();
                        Token.Comment += "-";
                        break;

                    case -1:
                        ReportParseError();
                        tokenizer.EmitToken(Token);
                        return DataState.Instance;
                //Reconsume the EOF character (?)

                    default:
                        ReportParseError();
                        Token.Comment += "--" + (char)c;
                        CommentState.Instance.Token = Token;
                        return CommentState.Instance;
                }
            }
        }
Пример #12
0
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            int c = Read(reader);
            if (IsWhitespace(c))
            {
                if (tokenizer.IsAppropriateEndTagToken(Token))
                {
                    return BeforeAttributeNameState.Instance;
                }
            }
            else if (c == '/')
            {
                if (tokenizer.IsAppropriateEndTagToken(Token))
                {
                    return SelfClosingStartTagState.Instance;
                }
            }
            else if (c == '>')
            {
                if (tokenizer.IsAppropriateEndTagToken(Token))
                {
                    tokenizer.EmitToken(Token); // TODO - is this the right token to emit?
                    return DataState.Instance;
                }
            }
            else if (IsUppercaseAsciiLetter(c))
            {
                Token.TagName += Char.ToLower((char)c);
                tokenizer.TemporaryBuffer.Add((char)c);
                return this;
            }
            else if (IsLowercaseAsciiLetter(c))
            {
                Token.TagName += (char)c;
                tokenizer.TemporaryBuffer.Add((char)c);
                return this;
            }

            tokenizer.EmitChar('<');
            tokenizer.EmitChar('/');
            foreach (char bc in tokenizer.TemporaryBuffer)
            {
                tokenizer.EmitChar(bc);
            }
            RCDATAState.Instance.LastConsumedCharacters.Enqueue((char)c);
            return RCDATAState.Instance;
        }
Пример #13
0
 public override BaseState Process(HtmlTokenizer tokenizer, System.IO.StreamReader reader)
 {
     for (;;){
         int c = Read(reader);
         switch(c)
         {
             case 0:
                 ReportParseError();
                 tokenizer.EmitChar('\uFFFD');
                 break;
             case -1:
                 tokenizer.EmitToken(new EndOfFileToken());
                 break;
             default:
                 tokenizer.EmitChar((char)c);
                 return this;
         }
     }
 }
        public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader)
        {
            AttributeToken attrToken = null;

            int c;
            do
            {
                c = Read(reader);
            } while (IsWhitespace(c));

            if (c == '/')
            {
                return SelfClosingStartTagState.Instance;
            }

            if (c == '>')
            {
                tokenizer.EmitToken(Token);
                return DataState.Instance;
            }

            if (base.IsUppercaseAsciiLetter(c))
            {
                attrToken = new AttributeToken() {
                    AttributeName = Char.ToLower((char)c).ToString(),
                    AttributeValue = string.Empty,
                    ContainingTag = Token
                };
                Token.Attributes.Add(attrToken);
                AttributeNameState.Instance.Token = attrToken;
                return AttributeNameState.Instance;
            }

            if (c == 0)
            {
                ReportParseError();
                attrToken = new AttributeToken() {
                    AttributeName = "\uFFFD",
                    AttributeValue = string.Empty,
                    ContainingTag = Token
                };
                Token.Attributes.Add(attrToken);
                AttributeNameState.Instance.Token = attrToken;
                return AttributeNameState.Instance;
            }

            if (c == -1)
            {
                ReportParseError();
                return DataState.Instance;
                // Reconsume the EOF character (?)
            }

            if (c == '"' || c == '\'' || c == '<' || c == '=')
            {
                ReportParseError();
            }

            attrToken = new AttributeToken() {
                AttributeName = ((char)c).ToString(),
                AttributeValue = string.Empty,
                ContainingTag = Token
            };

            Token.Attributes.Add(attrToken);
            AttributeNameState.Instance.Token = attrToken;
            return AttributeNameState.Instance;
        }