コード例 #1
0
        /// <summary>
        /// Before any doctype - still in the prolog. No declaration
        /// allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void BeforeDoctype(XmlToken token)
        {
            switch (token.Type)
            {
            case XmlTokenType.DOCTYPE:
            {
                var tok     = (XmlDoctypeToken)token;
                var doctype = new DocumentType();
                doctype.SystemId        = tok.SystemIdentifier;
                doctype.PublicId        = tok.PublicIdentifier;
                doctype.TypeDefinitions = tokenizer.DTD;
                doctype.Name            = tok.Name;
                doc.AppendChild(doctype);
                insert = XmlTreeMode.Misc;

                if (!tok.IsSystemIdentifierMissing && !standalone)
                {
                    ScanExternalSubset(doctype.SystemId, doctype.TypeDefinitions);
                }

                break;
            }

            default:
            {
                InMisc(token);
                break;
            }
            }
        }
コード例 #2
0
        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        protected XmlToken CommentStart(Char c)
        {
            _stringBuffer.Clear();

            if (c == Specification.MINUS)
            {
                return(CommentDashStart(_src.Next));
            }
            else if (c == Specification.NULL)
            {
                RaiseErrorOccurred(ErrorCode.NULL);
                _stringBuffer.Append(Specification.REPLACEMENT);
                return(Comment(_src.Next));
            }
            else if (c == Specification.GT)
            {
                RaiseErrorOccurred(ErrorCode.TagClosedWrong);
                return(XmlToken.Comment(_stringBuffer.ToString()));
            }
            else if (c == Specification.EOF)
            {
                RaiseErrorOccurred(ErrorCode.EOF);
                _src.Back();
                return(XmlToken.Comment(_stringBuffer.ToString()));
            }
            else
            {
                _stringBuffer.Append(c);
                return(Comment(_src.Next));
            }
        }
コード例 #3
0
        /// <summary>
        /// After the body state - nothing except Comment PI S allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void AfterBody(XmlToken token)
        {
            switch (token.Type)
            {
            case XmlTokenType.ProcessingInstruction:
            case XmlTokenType.Comment:
            {
                InMisc(token);
                break;
            }

            case XmlTokenType.EOF:
            {
                if (doc.Options.IsValidating && !XmlValidator.Run(doc))
                {
                    throw Errors.Xml(ErrorCode.XmlValidationFailed);
                }

                break;
            }

            default:
            {
                if (!token.IsIgnorable)
                {
                    throw Errors.Xml(ErrorCode.XmlMissingRoot);
                }

                break;
            }
            }
        }
コード例 #4
0
        /// <summary>
        /// Consumes a token and processes it.
        /// </summary>
        /// <param name="token">The token to consume.</param>
        void Consume(XmlToken token)
        {
            switch (insert)
            {
            case XmlTreeMode.Initial:
                Initial(token);
                break;

            case XmlTreeMode.Prolog:
                BeforeDoctype(token);
                break;

            case XmlTreeMode.Misc:
                InMisc(token);
                break;

            case XmlTreeMode.Body:
                InBody(token);
                break;

            case XmlTreeMode.After:
                AfterBody(token);
                break;
            }
        }
コード例 #5
0
 void BeforeDoctype(XmlToken token)
 {
     if (token.Type == XmlTokenType.DOCTYPE)
     {
         var tok     = (XmlDoctypeToken)token;
         var doctype = new DocumentType();
         doctype.SystemId = tok.SystemIdentifier;
         doctype.PublicId = tok.PublicIdentifier;
         doctype.Name     = tok.Name;
         doc.AppendChild(doctype);
         insert = XmlTreeMode.Body;
     }
     else if (token.Type == XmlTokenType.ProcessingInstruction)
     {
         var tok = (XmlPIToken)token;
         var pi  = doc.CreateProcessingInstruction(tok.Target, tok.Content);
         doc.AppendChild(pi);
     }
     else if (token.Type == XmlTokenType.Comment)
     {
         var tok = (XmlCommentToken)token;
         var com = doc.CreateComment(tok.Data);
         doc.AppendChild(com);
     }
     else if (!token.IsIgnorable)
     {
         insert = XmlTreeMode.Body;
         InBody(token);
     }
 }
コード例 #6
0
        void Initial(XmlToken token)
        {
            if (token.Type == XmlTokenType.Declaration)
            {
                var tok = (XmlDeclarationToken)token;
                standalone = tok.Standalone;
                var ver = 1.0;

                if (!tok.IsEncodingMissing)
                {
                    SetEncoding(tok.Encoding);
                }

                //The declaration token -- Check version
                if (!Double.TryParse(tok.Version, out ver) || ver >= 2.0)
                {
                    throw new ArgumentException("The given version number is not supported.");
                }
            }
            else if (!token.IsIgnorable)
            {
                RaiseErrorOccurred(ErrorCode.UndefinedMarkupDeclaration);
                insert = XmlTreeMode.Prolog;
                BeforeDoctype(token);
            }
        }
コード例 #7
0
        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken CommentBangEnd(Char c)
        {
            if (c == Specification.MINUS)
            {
                _stringBuffer.Append(Specification.MINUS);
                _stringBuffer.Append(Specification.MINUS);
                _stringBuffer.Append(Specification.EM);
                return(CommentDashEnd(_src.Next));
            }
            else if (c == Specification.GT)
            {
                return(XmlToken.Comment(_stringBuffer.ToString()));
            }
            else if (c == Specification.NULL)
            {
                RaiseErrorOccurred(ErrorCode.NULL);
                _stringBuffer.Append(Specification.MINUS);
                _stringBuffer.Append(Specification.MINUS);
                _stringBuffer.Append(Specification.EM);
                _stringBuffer.Append(Specification.REPLACEMENT);
                return(Comment(_src.Next));
            }
            else if (c == Specification.EOF)
            {
                RaiseErrorOccurred(ErrorCode.EOF);
                _src.Back();
                return(XmlToken.Comment(_stringBuffer.ToString()));
            }

            _stringBuffer.Append(Specification.MINUS);
            _stringBuffer.Append(Specification.MINUS);
            _stringBuffer.Append(Specification.EM);
            _stringBuffer.Append(c);
            return(Comment(_src.Next));
        }
コード例 #8
0
        /// <summary>
        /// Gets the next available token.
        /// </summary>
        /// <returns>The next available token.</returns>
        public XmlToken Get()
        {
            if (_src.IsEnded)
            {
                return(XmlToken.EOF);
            }

            XmlToken token = Data(_src.Current);

            _src.Advance();
            return(token);
        }
コード例 #9
0
 /// <summary>
 /// More http://www.w3.org/TR/REC-xml/#sec-pi.
 /// </summary>
 /// <param name="c">The next input character.</param>
 protected XmlToken ProcessingStart(Char c)
 {
     if (c.IsLetter())
     {
         _stringBuffer.Clear();
         _stringBuffer.Append(c);
         return(ProcessingTarget(_src.Next, XmlToken.Processing()));
     }
     else
     {
         RaiseErrorOccurred(ErrorCode.AmbiguousOpenTag);
         throw new ArgumentException("Invalid processing instruction.");
     }
 }
コード例 #10
0
        /// <summary>
        /// Checks if the character sequence is equal to ]]&gt;.
        /// </summary>
        /// <param name="ch">The character to examine.</param>
        /// <returns>The token if everything is alright.</returns>
        XmlToken CheckCharacter(Char ch)
        {
            if (ch == Specification.SBC)
            {
                if (_src.Next == Specification.GT)
                {
                    throw Errors.Xml(ErrorCode.XmlInvalidCharData);
                }

                _src.Back();
            }

            _src.Back();
            return(XmlToken.Character(Specification.SBC));
        }
コード例 #11
0
        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-logical-struct.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken Data(Char c)
        {
            switch (c)
            {
            case Specification.AMPERSAND:
                return(CharacterReference(_src.Next));

            case Specification.LT:
                return(TagOpen(_src.Next));

            case Specification.EOF:
                return(XmlToken.EOF);

            case Specification.SBC:
                return(CheckCharacter(_src.Next));

            default:
                return(XmlToken.Character(c));
            }
        }
コード例 #12
0
        /// <summary>
        /// In the body state - no doctypes and declarations allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void InMisc(XmlToken token)
        {
            switch (token.Type)
            {
            case XmlTokenType.Comment:
            {
                var tok = (XmlCommentToken)token;
                var com = doc.CreateComment(tok.Data);
                CurrentNode.AppendChild(com);
                break;
            }

            case XmlTokenType.ProcessingInstruction:
            {
                var tok = (XmlPIToken)token;
                var pi  = doc.CreateProcessingInstruction(tok.Target, tok.Content);
                CurrentNode.AppendChild(pi);
                break;
            }

            case XmlTokenType.StartTag:
            {
                insert = XmlTreeMode.Body;
                InBody(token);
                break;
            }

            default:
            {
                if (!token.IsIgnorable)
                {
                    throw Errors.Xml(ErrorCode.XmlMissingRoot);
                }

                break;
            }
            }
        }
コード例 #13
0
        /// <summary>
        /// The initial state. Expects an XML declaration.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void Initial(XmlToken token)
        {
            if (token.Type == XmlTokenType.Declaration)
            {
                var tok = (XmlDeclarationToken)token;
                standalone = tok.Standalone;

                if (!tok.IsEncodingMissing)
                {
                    SetEncoding(tok.Encoding);
                }

                if (!CheckVersion(tok.Version))
                {
                    throw Errors.Xml(ErrorCode.XmlDeclarationVersionUnsupported);
                }
            }
            else
            {
                insert = XmlTreeMode.Prolog;
                BeforeDoctype(token);
            }
        }
コード例 #14
0
        /// <summary>
        /// See http://www.w3.org/TR/REC-xml/#NT-CData.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlCDataToken CData(Char c)
        {
            _stringBuffer.Clear();

            while (true)
            {
                if (c == Specification.EOF)
                {
                    throw Errors.Xml(ErrorCode.EOF);
                }

                if (c == Specification.SBC && _src.ContinuesWith("]]>"))
                {
                    _src.Advance(2);
                    break;
                }

                _stringBuffer.Append(c);
                c = _src.Next;
            }

            return(XmlToken.CData(_stringBuffer.ToString()));
        }
コード例 #15
0
        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken CommentEnd(Char c)
        {
            if (c == Specification.GT)
            {
                return(XmlToken.Comment(stringBuffer.ToString()));
            }
            else if (c == Specification.NULL)
            {
                RaiseErrorOccurred(ErrorCode.NULL);
                stringBuffer.Append(Specification.MINUS);
                stringBuffer.Append(Specification.REPLACEMENT);
                return(Comment(src.Next));
            }
            else if (c == Specification.EM)
            {
                RaiseErrorOccurred(ErrorCode.CommentEndedWithEM);
                return(CommentBangEnd(src.Next));
            }
            else if (c == Specification.MINUS)
            {
                RaiseErrorOccurred(ErrorCode.CommentEndedWithDash);
                stringBuffer.Append(Specification.MINUS);
                return(CommentEnd(src.Next));
            }
            else if (c == Specification.EOF)
            {
                RaiseErrorOccurred(ErrorCode.EOF);
                src.Back();
                return(XmlToken.Comment(stringBuffer.ToString()));
            }

            RaiseErrorOccurred(ErrorCode.CommentEndedUnexpected);
            stringBuffer.Append(Specification.MINUS);
            stringBuffer.Append(Specification.MINUS);
            stringBuffer.Append(c);
            return(Comment(src.Next));
        }
コード例 #16
0
        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken Comment(Char c)
        {
            while (true)
            {
                if (c == Specification.MINUS)
                {
                    return(CommentDashEnd(src.Next));
                }
                else if (c == Specification.EOF)
                {
                    RaiseErrorOccurred(ErrorCode.EOF);
                    src.Back();
                    return(XmlToken.Comment(stringBuffer.ToString()));
                }
                else if (c == Specification.NULL)
                {
                    RaiseErrorOccurred(ErrorCode.NULL);
                    c = Specification.REPLACEMENT;
                }

                stringBuffer.Append(c);
                c = src.Next;
            }
        }
コード例 #17
0
        void InBody(XmlToken token)
        {
            switch (token.Type)
            {
            case XmlTokenType.StartTag:
            {
                var tok = (XmlTagToken)token;
                var tag = doc.CreateElement(tok.Name);

                if (!tok.IsSelfClosing)
                {
                    open.Add(tag);
                }

                CurrentNode.AppendChild(tag);

                for (int i = 0; i < tok.Attributes.Count; i++)
                {
                    tag.SetAttribute(tok.Attributes[i].Key, tok.Attributes[i].Value);
                }

                break;
            }

            case XmlTokenType.EndTag:
            {
                if (open.Count == 0)
                {
                    throw new ArgumentException("Unexpected end-tag (no current element).");
                }

                var tok = (XmlTagToken)token;

                if (CurrentNode.NodeName != tok.Name)
                {
                    throw new ArgumentException("Mismatched end-tag.");
                }

                open.RemoveAt(open.Count - 1);
                break;
            }

            case XmlTokenType.Comment:
            {
                var tok = (XmlCommentToken)token;
                var com = doc.CreateComment(tok.Data);
                CurrentNode.AppendChild(com);
                break;
            }

            case XmlTokenType.ProcessingInstruction:
            {
                var tok = (XmlPIToken)token;
                var pi  = doc.CreateProcessingInstruction(tok.Target, tok.Content);
                CurrentNode.AppendChild(pi);
                break;
            }

            case XmlTokenType.Character:
            {
                //Append character to node
                break;
            }

            case XmlTokenType.EOF:
            {
                if (open.Count != 0)
                {
                    RaiseErrorOccurred(ErrorCode.EOF);
                    open.RemoveRange(0, open.Count);
                }
                break;
            }

            case XmlTokenType.DOCTYPE:
            {
                RaiseErrorOccurred(ErrorCode.DoctypeUnexpected);
                break;
            }

            case XmlTokenType.Declaration:
            {
                RaiseErrorOccurred(ErrorCode.UndefinedMarkupDeclaration);
                break;
            }
            }
        }
コード例 #18
0
        /// <summary>
        /// Called once an &amp; character is being seen.
        /// </summary>
        /// <param name="c">The next character after the &amp; character.</param>
        /// <returns>The entity token.</returns>
        XmlEntityToken CharacterReference(Char c)
        {
            var buffer = Pool.NewStringBuilder();

            if (c == Specification.NUM)
            {
                c = _src.Next;
                var hex = c == 'x' || c == 'X';

                if (hex)
                {
                    c = _src.Next;

                    while (c.IsHex())
                    {
                        buffer.Append(c);
                        c = _src.Next;
                    }
                }
                else
                {
                    while (c.IsDigit())
                    {
                        buffer.Append(c);
                        c = _src.Next;
                    }
                }

                if (buffer.Length > 0 && c == Specification.SC)
                {
                    return new XmlEntityToken {
                               Value = buffer.ToPool(), IsNumeric = true, IsHex = hex
                    }
                }
                ;
            }
            else if (c.IsXmlNameStart())
            {
                do
                {
                    buffer.Append(c);

                    c = _src.Next;
                }while (c.IsXmlName());

                if (c == Specification.SC)
                {
                    return new XmlEntityToken {
                               Value = buffer.ToPool()
                    }
                }
                ;
            }

            buffer.ToPool();
            throw Errors.Xml(ErrorCode.CharacterReferenceNotTerminated);
        }

        #endregion

        #region Tags

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-starttags.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken TagOpen(Char c)
        {
            if (c == Specification.EM)
            {
                return(MarkupDeclaration(_src.Next));
            }

            if (c == Specification.QM)
            {
                c = _src.Next;

                if (_src.ContinuesWith(Tags.XML, false))
                {
                    _src.Advance(2);

                    return(DeclarationStart(_src.Next));
                }

                return(ProcessingStart(c));
            }

            if (c == Specification.SOLIDUS)
            {
                return(TagEnd(_src.Next));
            }

            if (c.IsXmlNameStart())
            {
                _stringBuffer.Clear();
                _stringBuffer.Append(c);
                return(TagName(_src.Next, XmlToken.OpenTag()));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidStartTag);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#dt-etag.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken TagEnd(Char c)
        {
            if (c.IsXmlNameStart())
            {
                _stringBuffer.Clear();

                do
                {
                    _stringBuffer.Append(c);
                    c = _src.Next;
                }while (c.IsXmlName());

                while (c.IsSpaceCharacter())
                {
                    c = _src.Next;
                }

                if (c == Specification.GT)
                {
                    var tag = XmlToken.CloseTag();
                    tag.Name = _stringBuffer.ToString();
                    return(tag);
                }
            }

            if (c == Specification.EOF)
            {
                throw Errors.Xml(ErrorCode.EOF);
            }

            throw Errors.Xml(ErrorCode.XmlInvalidEndTag);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-Name.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="tag">The current tag token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken TagName(Char c, XmlTagToken tag)
        {
            while (c.IsXmlName())
            {
                _stringBuffer.Append(c);
                c = _src.Next;
            }

            tag.Name = _stringBuffer.ToString();

            if (c == Specification.EOF)
            {
                throw Errors.Xml(ErrorCode.EOF);
            }

            if (c == Specification.GT)
            {
                return(tag);
            }
            else if (c.IsSpaceCharacter())
            {
                return(AttributeBeforeName(_src.Next, tag));
            }
            else if (c == Specification.SOLIDUS)
            {
                return(TagSelfClosing(_src.Next, tag));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidName);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#d0e2480.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="tag">The current tag token.</param>
        XmlToken TagSelfClosing(Char c, XmlTagToken tag)
        {
            tag.IsSelfClosing = true;

            if (c == Specification.GT)
            {
                return(tag);
            }

            if (c == Specification.EOF)
            {
                throw Errors.Xml(ErrorCode.EOF);
            }

            throw Errors.Xml(ErrorCode.XmlInvalidName);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#dt-markup.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken MarkupDeclaration(Char c)
        {
            if (_src.ContinuesWith("--"))
            {
                _src.Advance();
                return(CommentStart(_src.Next));
            }
            else if (_src.ContinuesWith(Tags.DOCTYPE, false))
            {
                _src.Advance(6);
                return(Doctype(_src.Next));
            }
            else if (_src.ContinuesWith(CDATA, false))
            {
                _src.Advance(6);
                return(CData(_src.Next));
            }

            throw Errors.Xml(ErrorCode.UndefinedMarkupDeclaration);
        }

        #endregion

        #region XML Declaration

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-XMLDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken DeclarationStart(Char c)
        {
            if (!c.IsSpaceCharacter())
            {
                _stringBuffer.Clear();
                _stringBuffer.Append(Tags.XML);
                return(ProcessingTarget(c, XmlToken.Processing()));
            }

            do
            {
                c = _src.Next;
            }while (c.IsSpaceCharacter());

            if (_src.ContinuesWith(AttributeNames.VERSION, false))
            {
                _src.Advance(6);
                return(DeclarationVersionAfterName(_src.Next, XmlToken.Declaration()));
            }

            throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-VersionInfo.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationVersionAfterName(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.EQ)
            {
                return(DeclarationVersionBeforeValue(_src.Next, decl));
            }

            throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-VersionInfo.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationVersionBeforeValue(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.DQ || c == Specification.SQ)
            {
                _stringBuffer.Clear();
                return(DeclarationVersionValue(_src.Next, c, decl));
            }

            throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-VersionInfo.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="q">The quote character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationVersionValue(Char c, Char q, XmlDeclarationToken decl)
        {
            while (c != q)
            {
                if (c == Specification.EOF)
                {
                    throw Errors.Xml(ErrorCode.EOF);
                }

                _stringBuffer.Append(c);
                c = _src.Next;
            }

            decl.Version = _stringBuffer.ToString();
            c            = _src.Next;

            if (c.IsSpaceCharacter())
            {
                return(DeclarationAfterVersion(c, decl));
            }

            return(DeclarationEnd(c, decl));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-VersionNum.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationAfterVersion(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (_src.ContinuesWith(AttributeNames.ENCODING, false))
            {
                _src.Advance(7);
                return(DeclarationEncodingAfterName(_src.Next, decl));
            }
            else if (_src.ContinuesWith(AttributeNames.STANDALONE, false))
            {
                _src.Advance(9);
                return(DeclarationStandaloneAfterName(_src.Next, decl));
            }

            return(DeclarationEnd(c, decl));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-EncodingDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationEncodingAfterName(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.EQ)
            {
                return(DeclarationEncodingBeforeValue(_src.Next, decl));
            }

            throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-EncodingDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationEncodingBeforeValue(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.DQ || c == Specification.SQ)
            {
                var q = c;
                _stringBuffer.Clear();
                c = _src.Next;

                if (c.IsLetter())
                {
                    return(DeclarationEncodingValue(c, q, decl));
                }
            }

            throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-EncodingDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="q">The quote character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationEncodingValue(Char c, Char q, XmlDeclarationToken decl)
        {
            do
            {
                if (c.IsAlphanumericAscii() || c == Specification.DOT || c == Specification.UNDERSCORE || c == Specification.MINUS)
                {
                    _stringBuffer.Append(c);
                    c = _src.Next;
                }
                else
                {
                    throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
                }
            }while (c != q);

            decl.Encoding = _stringBuffer.ToString();
            c             = _src.Next;

            if (c.IsSpaceCharacter())
            {
                return(DeclarationAfterEncoding(c, decl));
            }

            return(DeclarationEnd(c, decl));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-SDDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationAfterEncoding(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (_src.ContinuesWith(AttributeNames.STANDALONE, false))
            {
                _src.Advance(9);
                return(DeclarationStandaloneAfterName(_src.Next, decl));
            }

            return(DeclarationEnd(c, decl));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-SDDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationStandaloneAfterName(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.EQ)
            {
                return(DeclarationStandaloneBeforeValue(_src.Next, decl));
            }

            throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-SDDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationStandaloneBeforeValue(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.DQ || c == Specification.SQ)
            {
                _stringBuffer.Clear();
                return(DeclarationStandaloneValue(_src.Next, c, decl));
            }

            throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-SDDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="q">The quote character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlToken DeclarationStandaloneValue(Char c, Char q, XmlDeclarationToken decl)
        {
            while (c != q)
            {
                if (c == Specification.EOF)
                {
                    throw Errors.Xml(ErrorCode.EOF);
                }

                _stringBuffer.Append(c);
                c = _src.Next;
            }

            var s = _stringBuffer.ToString();

            if (s.Equals(YES))
            {
                decl.Standalone = true;
            }
            else if (s.Equals(NO))
            {
                decl.Standalone = false;
            }
            else
            {
                throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
            }

            return(DeclarationEnd(_src.Next, decl));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-XMLDecl.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="decl">The current declaration token.</param>
        XmlDeclarationToken DeclarationEnd(Char c, XmlDeclarationToken decl)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c != Specification.QM || _src.Next != Specification.GT)
            {
                throw Errors.Xml(ErrorCode.XmlDeclarationInvalid);
            }

            return(decl);
        }

        #endregion

        #region Doctype

        /// <summary>
        /// See 8.2.4.52 DOCTYPE state
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken Doctype(Char c)
        {
            if (c.IsSpaceCharacter())
            {
                return(DoctypeNameBefore(_src.Next));
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.53 Before DOCTYPE name state
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken DoctypeNameBefore(Char c)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c.IsXmlNameStart())
            {
                _stringBuffer.Clear();
                _stringBuffer.Append(c);
                return(DoctypeName(_src.Next, XmlToken.Doctype()));
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.54 DOCTYPE name state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypeName(Char c, XmlDoctypeToken doctype)
        {
            while (c.IsXmlName())
            {
                _stringBuffer.Append(c);
                c = _src.Next;
            }

            doctype.Name = _stringBuffer.ToString();
            _stringBuffer.Clear();

            if (c == Specification.GT)
            {
                return(doctype);
            }
            else if (c.IsSpaceCharacter())
            {
                return(DoctypeNameAfter(_src.Next, doctype));
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.55 After DOCTYPE name state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypeNameAfter(Char c, XmlDoctypeToken doctype)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.GT)
            {
                return(doctype);
            }

            if (_src.ContinuesWith(PUBLIC, false))
            {
                _src.Advance(5);
                return(DoctypePublic(_src.Next, doctype));
            }
            else if (_src.ContinuesWith(SYSTEM, false))
            {
                _src.Advance(5);
                return(DoctypeSystem(_src.Next, doctype));
            }
            else if (c == Specification.SBO)
            {
                _src.Advance();
                ScanInternalSubset(doctype);
                return(DoctypeAfter(_src.Next, doctype));
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.56 After DOCTYPE public keyword state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypePublic(Char c, XmlDoctypeToken doctype)
        {
            if (c.IsSpaceCharacter())
            {
                while (c.IsSpaceCharacter())
                {
                    c = _src.Next;
                }

                if (c == Specification.DQ || c == Specification.SQ)
                {
                    doctype.PublicIdentifier = String.Empty;
                    return(DoctypePublicIdentifierValue(_src.Next, c, doctype));
                }
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.58 DOCTYPE public identifier (double-quoted) state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="q">The closing character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypePublicIdentifierValue(Char c, Char q, XmlDoctypeToken doctype)
        {
            while (c != q)
            {
                if (!c.IsPubidChar())
                {
                    throw Errors.Xml(ErrorCode.XmlInvalidPubId);
                }

                _stringBuffer.Append(c);
                c = _src.Next;
            }

            doctype.PublicIdentifier = _stringBuffer.ToString();
            _stringBuffer.Clear();
            return(DoctypePublicIdentifierAfter(_src.Next, doctype));
        }

        /// <summary>
        /// See 8.2.4.60 After DOCTYPE public identifier state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypePublicIdentifierAfter(Char c, XmlDoctypeToken doctype)
        {
            if (c == Specification.GT)
            {
                return(doctype);
            }
            else if (c.IsSpaceCharacter())
            {
                return(DoctypeBetween(_src.Next, doctype));
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.61 Between DOCTYPE public and system identifiers state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypeBetween(Char c, XmlDoctypeToken doctype)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.GT)
            {
                return(doctype);
            }

            if (c == Specification.DQ || c == Specification.SQ)
            {
                doctype.SystemIdentifier = String.Empty;
                return(DoctypeSystemIdentifierValue(_src.Next, c, doctype));
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.62 After DOCTYPE system keyword state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypeSystem(Char c, XmlDoctypeToken doctype)
        {
            if (c.IsSpaceCharacter())
            {
                while (c.IsSpaceCharacter())
                {
                    c = _src.Next;
                }

                if (c == Specification.DQ || c == Specification.SQ)
                {
                    doctype.SystemIdentifier = String.Empty;
                    return(DoctypeSystemIdentifierValue(_src.Next, c, doctype));
                }
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        /// <summary>
        /// See 8.2.4.64 DOCTYPE system identifier (double-quoted) state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="q">The quote character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypeSystemIdentifierValue(Char c, Char q, XmlDoctypeToken doctype)
        {
            while (c != q)
            {
                if (c == Specification.EOF)
                {
                    throw Errors.Xml(ErrorCode.EOF);
                }

                _stringBuffer.Append(c);
                c = _src.Next;
            }

            doctype.SystemIdentifier = _stringBuffer.ToString();
            _stringBuffer.Clear();
            return(DoctypeSystemIdentifierAfter(_src.Next, doctype));
        }

        /// <summary>
        /// See 8.2.4.66 After DOCTYPE system identifier state
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypeSystemIdentifierAfter(Char c, XmlDoctypeToken doctype)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.SBO)
            {
                _src.Advance();
                ScanInternalSubset(doctype);
                c = _src.Next;
            }

            return(DoctypeAfter(c, doctype));
        }

        /// <summary>
        /// The doctype finalizer.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="doctype">The current doctype token.</param>
        /// <returns>The emitted token.</returns>
        XmlToken DoctypeAfter(Char c, XmlDoctypeToken doctype)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.GT)
            {
                return(doctype);
            }

            throw Errors.Xml(ErrorCode.DoctypeInvalid);
        }

        #endregion

        #region Attributes

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-Attribute.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="tag">The current tag token.</param>
        XmlToken AttributeBeforeName(Char c, XmlTagToken tag)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.SOLIDUS)
            {
                return(TagSelfClosing(_src.Next, tag));
            }
            else if (c == Specification.GT)
            {
                return(tag);
            }
            else if (c == Specification.EOF)
            {
                throw Errors.Xml(ErrorCode.EOF);
            }

            if (c.IsXmlNameStart())
            {
                _stringBuffer.Clear();
                _stringBuffer.Append(c);
                return(AttributeName(_src.Next, tag));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidAttribute);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-Attribute.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="tag">The current tag token.</param>
        XmlToken AttributeName(Char c, XmlTagToken tag)
        {
            while (c.IsXmlName())
            {
                _stringBuffer.Append(c);
                c = _src.Next;
            }

            var name = _stringBuffer.ToString();

            if (!String.IsNullOrEmpty(tag.GetAttribute(name)))
            {
                throw Errors.Xml(ErrorCode.XmlUniqueAttribute);
            }

            tag.AddAttribute(name);

            if (c.IsSpaceCharacter())
            {
                do
                {
                    c = _src.Next;
                }while (c.IsSpaceCharacter());
            }

            if (c == Specification.EQ)
            {
                return(AttributeBeforeValue(_src.Next, tag));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidAttribute);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-Attribute.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="tag">The current tag token.</param>
        XmlToken AttributeBeforeValue(Char c, XmlTagToken tag)
        {
            while (c.IsSpaceCharacter())
            {
                c = _src.Next;
            }

            if (c == Specification.DQ || c == Specification.SQ)
            {
                _stringBuffer.Clear();
                return(AttributeValue(_src.Next, c, tag));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidAttribute);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-Attribute.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="q">The quote character.</param>
        /// <param name="tag">The current tag token.</param>
        XmlToken AttributeValue(Char c, Char q, XmlTagToken tag)
        {
            while (c != q)
            {
                if (c == Specification.EOF)
                {
                    throw Errors.Xml(ErrorCode.EOF);
                }

                if (c == Specification.AMPERSAND)
                {
                    _stringBuffer.Append(GetEntity(CharacterReference(_src.Next)));
                }
                else if (c == Specification.LT)
                {
                    throw Errors.Xml(ErrorCode.XmlLtInAttributeValue);
                }
                else
                {
                    _stringBuffer.Append(c);
                }

                c = _src.Next;
            }

            tag.SetAttributeValue(_stringBuffer.ToString());
            return(AttributeAfterValue(_src.Next, tag));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#NT-Attribute.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="tag">The current tag token.</param>
        XmlToken AttributeAfterValue(Char c, XmlTagToken tag)
        {
            if (c.IsSpaceCharacter())
            {
                return(AttributeBeforeName(_src.Next, tag));
            }
            else if (c == Specification.SOLIDUS)
            {
                return(TagSelfClosing(_src.Next, tag));
            }
            else if (c == Specification.GT)
            {
                return(tag);
            }

            throw Errors.Xml(ErrorCode.XmlInvalidAttribute);
        }

        #endregion

        #region Processing Instruction

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-pi.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken ProcessingStart(Char c)
        {
            if (c.IsXmlNameStart())
            {
                _stringBuffer.Clear();
                _stringBuffer.Append(c);
                return(ProcessingTarget(_src.Next, XmlToken.Processing()));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidPI);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-pi.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="pi">The processing instruction token.</param>
        XmlToken ProcessingTarget(Char c, XmlPIToken pi)
        {
            while (c.IsXmlName())
            {
                _stringBuffer.Append(c);
                c = _src.Next;
            }

            pi.Target = _stringBuffer.ToString();
            _stringBuffer.Clear();

            if (String.Compare(pi.Target, Tags.XML, StringComparison.OrdinalIgnoreCase) == 0)
            {
                throw Errors.Xml(ErrorCode.XmlInvalidPI);
            }

            if (c == Specification.QM)
            {
                c = _src.Next;

                if (c == Specification.GT)
                {
                    return(pi);
                }
            }
            else if (c.IsSpaceCharacter())
            {
                return(ProcessingContent(_src.Next, pi));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidPI);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-pi.
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="pi">The processing instruction token.</param>
        XmlToken ProcessingContent(Char c, XmlPIToken pi)
        {
            while (c != Specification.EOF)
            {
                if (c == Specification.QM)
                {
                    c = _src.Next;

                    if (c == Specification.GT)
                    {
                        pi.Content = _stringBuffer.ToString();
                        return(pi);
                    }

                    _stringBuffer.Append(Specification.QM);
                }
                else
                {
                    _stringBuffer.Append(c);
                    c = _src.Next;
                }
            }

            throw Errors.Xml(ErrorCode.EOF);
        }

        #endregion

        #region Comments

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken CommentStart(Char c)
        {
            _stringBuffer.Clear();
            return(Comment(c));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken Comment(Char c)
        {
            while (c.IsXmlChar())
            {
                if (c == Specification.MINUS)
                {
                    return(CommentDash(_src.Next));
                }

                _stringBuffer.Append(c);
                c = _src.Next;
            }

            throw Errors.Xml(ErrorCode.XmlInvalidComment);
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken CommentDash(Char c)
        {
            if (c == Specification.MINUS)
            {
                return(CommentEnd(_src.Next));
            }

            return(Comment(c));
        }

        /// <summary>
        /// More http://www.w3.org/TR/REC-xml/#sec-comments.
        /// </summary>
        /// <param name="c">The next input character.</param>
        XmlToken CommentEnd(Char c)
        {
            if (c == Specification.GT)
            {
                return(XmlToken.Comment(_stringBuffer.ToString()));
            }

            throw Errors.Xml(ErrorCode.XmlInvalidComment);
        }

        #endregion

        #region Helpers

        /// <summary>
        /// Scans the internal subset, i.e. the DTD in [] of the current source.
        /// </summary>
        /// <param name="doctype">The doctype which contains the subset.</param>
        void ScanInternalSubset(XmlDoctypeToken doctype)
        {
            var dtd = new DtdParser(_dtd, _src);

            dtd.IsInternal     = true;
            dtd.ErrorOccurred += (s, e) => RaiseErrorOccurred(s, e);
            dtd.Parse();
            doctype.InternalSubset = dtd.Result.Text;
        }

        #endregion
    }
}
コード例 #19
0
ファイル: DtdTokenizer.cs プロジェクト: Rajbandi/AngleSharp
        DtdToken Rework(XmlToken xmlToken)
        {
            if (xmlToken is XmlPIToken)
                return new DtdPIToken((XmlPIToken)xmlToken);
            else if (xmlToken is XmlCommentToken)
                return new DtdCommentToken((XmlCommentToken)xmlToken);
            else if (xmlToken is XmlEndOfFileToken)
                return DtdToken.EOF;

            throw new ArgumentException("The received token is not valid for a DTD.");
        }
コード例 #20
0
        /// <summary>
        /// In the body state - no doctypes and declarations allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void InBody(XmlToken token)
        {
            switch (token.Type)
            {
            case XmlTokenType.StartTag:
            {
                var tok = (XmlTagToken)token;
                var tag = doc.CreateElement(tok.Name);
                CurrentNode.AppendChild(tag);

                if (!tok.IsSelfClosing)
                {
                    open.Add(tag);
                }
                else if (open.Count == 0)
                {
                    insert = XmlTreeMode.After;
                }

                for (int i = 0; i < tok.Attributes.Count; i++)
                {
                    tag.SetAttribute(tok.Attributes[i].Key, tok.Attributes[i].Value.Trim());
                }

                break;
            }

            case XmlTokenType.EndTag:
            {
                var tok = (XmlTagToken)token;

                if (CurrentNode.NodeName != tok.Name)
                {
                    throw Errors.Xml(ErrorCode.TagClosingMismatch);
                }

                open.RemoveAt(open.Count - 1);

                if (open.Count == 0)
                {
                    insert = XmlTreeMode.After;
                }

                break;
            }

            case XmlTokenType.ProcessingInstruction:
            case XmlTokenType.Comment:
            {
                InMisc(token);
                break;
            }

            case XmlTokenType.Entity:
            {
                var tok = (XmlEntityToken)token;
                var str = tokenizer.GetEntity(tok);
                CurrentNode.AppendText(str);
                break;
            }

            case XmlTokenType.CData:
            {
                var tok = (XmlCDataToken)token;
                CurrentNode.AppendText(tok.Data);
                break;
            }

            case XmlTokenType.Character:
            {
                var tok = (XmlCharacterToken)token;
                CurrentNode.AppendText(tok.Data);
                break;
            }

            case XmlTokenType.EOF:
            {
                throw Errors.Xml(ErrorCode.EOF);
            }

            case XmlTokenType.DOCTYPE:
            {
                throw Errors.Xml(ErrorCode.XmlDoctypeAfterContent);
            }

            case XmlTokenType.Declaration:
            {
                throw Errors.Xml(ErrorCode.XmlDeclarationMisplaced);
            }
            }
        }
コード例 #21
0
        void InBody(XmlToken token)
        {
            switch (token.Type)
            {
                case XmlTokenType.StartTag:
                {
                    var tok = (XmlTagToken)token;
                    var tag = doc.CreateElement(tok.Name);

                    if(!tok.IsSelfClosing)
                        open.Add(tag);

                    CurrentNode.AppendChild(tag);

                    for (int i = 0; i < tok.Attributes.Count; i++)
                        tag.SetAttribute(tok.Attributes[i].Key, tok.Attributes[i].Value);

                    break;
                }
                case XmlTokenType.EndTag:
                {
                    if (open.Count == 0)
                        throw new ArgumentException("Unexpected end-tag (no current element).");

                    var tok = (XmlTagToken)token;

                    if (CurrentNode.NodeName != tok.Name)
                        throw new ArgumentException("Mismatched end-tag.");

                    open.RemoveAt(open.Count - 1);
                    break;
                }
                case XmlTokenType.Comment:
                {
                    var tok = (XmlCommentToken)token;
                    var com = doc.CreateComment(tok.Data);
                    CurrentNode.AppendChild(com);
                    break;
                }
                case XmlTokenType.ProcessingInstruction:
                {
                    var tok = (XmlPIToken)token;
                    var pi = doc.CreateProcessingInstruction(tok.Target, tok.Content);
                    CurrentNode.AppendChild(pi);
                    break;
                }
                case XmlTokenType.Character:
                {
                    //Append character to node
                    break;
                }
                case XmlTokenType.EOF:
                {
                    if (open.Count != 0)
                    {
                        RaiseErrorOccurred(ErrorCode.EOF);
                        open.RemoveRange(0, open.Count);
                    }
                    break;
                }
                case XmlTokenType.DOCTYPE:
                {
                    RaiseErrorOccurred(ErrorCode.DoctypeUnexpected);
                    break;
                }
                case XmlTokenType.Declaration:
                {
                    RaiseErrorOccurred(ErrorCode.UndefinedMarkupDeclaration);
                    break;
                }
            }
        }
コード例 #22
0
ファイル: XmlParser.cs プロジェクト: rrsc/AngleSharp
        /// <summary>
        /// After the body state - nothing except Comment PI S allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void AfterBody(XmlToken token)
        {
            switch (token.Type)
            {
                case XmlTokenType.ProcessingInstruction:
                case XmlTokenType.Comment:
                {
                    InMisc(token);
                    break;
                }
                case XmlTokenType.EOF:
                {
                    if(doc.Options.IsValidating && !XmlValidator.Run(doc))
                        throw Errors.Xml(ErrorCode.XmlValidationFailed);

                    break;
                }
                default:
                {
                    if (!token.IsIgnorable)
                        throw Errors.Xml(ErrorCode.XmlMissingRoot);

                    break;
                }
            }
        }
コード例 #23
0
        void Initial(XmlToken token)
        {
            if (token.Type == XmlTokenType.Declaration)
            {
                var tok = (XmlDeclarationToken)token;
                standalone = tok.Standalone;
                var ver = 1.0;

                if (!tok.IsEncodingMissing)
                    SetEncoding(tok.Encoding);

                //The declaration token -- Check version
                if (!Double.TryParse(tok.Version, out ver) || ver >= 2.0)
                    throw new ArgumentException("The given version number is not supported.");
            }
            else if (!token.IsIgnorable)
            {
                RaiseErrorOccurred(ErrorCode.UndefinedMarkupDeclaration);
                insert = XmlTreeMode.Prolog;
                BeforeDoctype(token);
            }
        }
コード例 #24
0
 void BeforeDoctype(XmlToken token)
 {
     if (token.Type == XmlTokenType.DOCTYPE)
     {
         var tok = (XmlDoctypeToken)token;
         var doctype = new DocumentType();
         doctype.SystemId = tok.SystemIdentifier;
         doctype.PublicId = tok.PublicIdentifier;
         doctype.Name = tok.Name;
         doc.AppendChild(doctype);
         insert = XmlTreeMode.Body;
     }
     else if (token.Type == XmlTokenType.ProcessingInstruction)
     {
         var tok = (XmlPIToken)token;
         var pi = doc.CreateProcessingInstruction(tok.Target, tok.Content);
         doc.AppendChild(pi);
     }
     else if (token.Type == XmlTokenType.Comment)
     {
         var tok = (XmlCommentToken)token;
         var com = doc.CreateComment(tok.Data);
         doc.AppendChild(com);
     }
     else if (!token.IsIgnorable)
     {
         insert = XmlTreeMode.Body;
         InBody(token);
     }
 }
コード例 #25
0
 /// <summary>
 /// Consumes a token and processes it.
 /// </summary>
 /// <param name="token">The token to consume.</param>
 void Consume(XmlToken token)
 {
     switch (insert)
     {
         case XmlTreeMode.Initial:
             Initial(token);
             break;
         case XmlTreeMode.Prolog:
             BeforeDoctype(token);
             break;
         case XmlTreeMode.Body:
             InBody(token);
             break;
     }
 }
コード例 #26
0
ファイル: XmlParser.cs プロジェクト: rrsc/AngleSharp
        /// <summary>
        /// In the body state - no doctypes and declarations allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void InMisc(XmlToken token)
        {
            switch (token.Type)
            {
                case XmlTokenType.Comment:
                {
                    var tok = (XmlCommentToken)token;
                    var com = doc.CreateComment(tok.Data);
                    CurrentNode.AppendChild(com);
                    break;
                }
                case XmlTokenType.ProcessingInstruction:
                {
                    var tok = (XmlPIToken)token;
                    var pi = doc.CreateProcessingInstruction(tok.Target, tok.Content);
                    CurrentNode.AppendChild(pi);
                    break;
                }
                case XmlTokenType.StartTag:
                {
                    insert = XmlTreeMode.Body;
                    InBody(token);
                    break;
                }
                default:
                {
                    if (!token.IsIgnorable)
                        throw Errors.Xml(ErrorCode.XmlMissingRoot);

                    break;
                }
            }
        }
コード例 #27
0
ファイル: XmlParser.cs プロジェクト: rrsc/AngleSharp
        /// <summary>
        /// The initial state. Expects an XML declaration.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void Initial(XmlToken token)
        {
            if (token.Type == XmlTokenType.Declaration)
            {
                var tok = (XmlDeclarationToken)token;
                standalone = tok.Standalone;

                if (!tok.IsEncodingMissing)
                    SetEncoding(tok.Encoding);

                if (!CheckVersion(tok.Version))
                    throw Errors.Xml(ErrorCode.XmlDeclarationVersionUnsupported);
            }
            else
            {
                insert = XmlTreeMode.Prolog;
                BeforeDoctype(token);
            }
        }
コード例 #28
0
ファイル: XmlParser.cs プロジェクト: rrsc/AngleSharp
        /// <summary>
        /// In the body state - no doctypes and declarations allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void InBody(XmlToken token)
        {
            switch (token.Type)
            {
                case XmlTokenType.StartTag:
                {
                    var tok = (XmlTagToken)token;
                    var tag = doc.CreateElement(tok.Name);
                    CurrentNode.AppendChild(tag);

                    if (!tok.IsSelfClosing)
                        open.Add(tag);
                    else if(open.Count == 0)
                        insert = XmlTreeMode.After;

                    for (int i = 0; i < tok.Attributes.Count; i++)
                        tag.SetAttribute(tok.Attributes[i].Key, tok.Attributes[i].Value.Trim());

                    break;
                }
                case XmlTokenType.EndTag:
                {
                    var tok = (XmlTagToken)token;

                    if (CurrentNode.NodeName != tok.Name)
                        throw Errors.Xml(ErrorCode.TagClosingMismatch);

                    open.RemoveAt(open.Count - 1);

                    if (open.Count == 0)
                        insert = XmlTreeMode.After;

                    break;
                }
                case XmlTokenType.ProcessingInstruction:
                case XmlTokenType.Comment:
                {
                    InMisc(token);
                    break;
                }
                case XmlTokenType.Entity:
                {
                    var tok = (XmlEntityToken)token;
                    var str = tokenizer.GetEntity(tok);
                    CurrentNode.AppendText(str);
                    break;
                }
                case XmlTokenType.CData:
                {
                    var tok = (XmlCDataToken)token;
                    CurrentNode.AppendText(tok.Data);
                    break;
                }
                case XmlTokenType.Character:
                {
                    var tok = (XmlCharacterToken)token;
                    CurrentNode.AppendText(tok.Data);
                    break;
                }
                case XmlTokenType.EOF:
                {
                    throw Errors.Xml(ErrorCode.EOF);
                }
                case XmlTokenType.DOCTYPE:
                {
                    throw Errors.Xml(ErrorCode.XmlDoctypeAfterContent);
                }
                case XmlTokenType.Declaration:
                {
                    throw Errors.Xml(ErrorCode.XmlDeclarationMisplaced);
                }
            }
        }
コード例 #29
0
ファイル: XmlParser.cs プロジェクト: rrsc/AngleSharp
        /// <summary>
        /// Before any doctype - still in the prolog. No declaration
        /// allowed.
        /// </summary>
        /// <param name="token">The consumed token.</param>
        void BeforeDoctype(XmlToken token)
        {
            switch (token.Type)
            {
                case XmlTokenType.DOCTYPE:
                {
                    var tok = (XmlDoctypeToken)token;
                    var doctype = new DocumentType();
                    doctype.SystemId = tok.SystemIdentifier;
                    doctype.PublicId = tok.PublicIdentifier;
                    doctype.TypeDefinitions = tokenizer.DTD;
                    doctype.Name = tok.Name;
                    doc.AppendChild(doctype);
                    insert = XmlTreeMode.Misc;

                    if (!tok.IsSystemIdentifierMissing && !standalone)
                        ScanExternalSubset(doctype.SystemId, doctype.TypeDefinitions);

                    break;
                }
                default:
                {
                    InMisc(token);
                    break;
                }
            }
        }