Пример #1
0
            /// <summary>
            /// Parses the text of an HTML tag and updates the <see cref="Name"/> and
            /// <see cref="AttributesPart"/> properties. <see cref="ITextParser"/>
            /// <paramref name="tp"/> should be pointed at the '&lt;' character that
            /// starts an HTML tag.
            /// </summary>
            public bool Parse(ITextParser tp)
            {
                if (tp.Peek() != '<')
                {
                    return(false);
                }
                tp.MoveAhead();

                if (tp.Peek() == '/')
                {
                    tp.MoveAhead();
                    IsEndTag = true;
                }

                if (tp.EndOfText)
                {
                    return(false);
                }

                if (!ParseName(tp))
                {
                    return(false);
                }

                if (!ParseAttributes(tp))
                {
                    return(false);
                }

                return(true);
            }
Пример #2
0
        /// <summary>
        /// Gets a named entity starting at the currrent
        /// character ('&amp;') and proceeding until ';'.
        /// Returns null if the substring does not match
        /// the expected entity sequence.
        /// </summary>
        private static string GetNamedEntity(ITextParser tp)
        {
            const string kValidCharacters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

            var  startPos = tp.Position + 1;
            var  offset   = 1;
            char c;

            while (true)
            {
                c = tp.Peek(offset);
                if (kValidCharacters.IndexOf(c) == -1)
                {
                    break;
                }
                offset++;
            }

            if (c != ';')
            {
                return(null);
            }

            var length = offset - 1;

            if (length < 1)
            {
                return(null);
            }

            return(tp.Substring(startPos, length));
        }
Пример #3
0
 /// <summary>
 /// Returns true if the substring starting at
 /// the current position and continuing to
 /// the next ';' contains a decimal or hex
 /// entity sequence. On entry, tp should be
 /// pointing at '&amp;'.
 /// </summary>
 private static bool EntityIsHexOrDecimal(ITextParser tp)
 {
     // Handle hex and numeric
     if (tp.Peek(1) == '#')
     {
         var c = tp.Peek(2);
         if (c == 'x' || c == 'X')
         {
             return(EntityContainsValidCharacters(tp, 3, hex));
         }
         else
         {
             return(EntityContainsValidCharacters(tp, 2, digits));
         }
     }
     return(false);
 }
Пример #4
0
            /// <summary>
            /// Parses the element name from the tag using the given
            /// <see cref="ITextParser"/> <paramref name="tp"/>. The
            /// <see cref="ITextParser"/> position should be set to
            /// the first character of the tag following the "&lt;",
            /// or following the "&lt;/" for end tags.
            /// Returns true if a syntactically valid name is found.
            /// </summary>
            private bool ParseName(ITextParser tp)
            {
                const string nameCharacters = "abcdefghijklmnopqrstuvwxyz"
                                              + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:";

                var startPos = tp.Position;

                var  offset = 0;
                char c;

                while ((c = tp.Peek(offset)) != TextParser.NullChar)
                {
                    if (nameCharacters.IndexOf(c) == -1)
                    {
                        break;
                    }
                    offset++;
                }

                // Did we get any valid characters?
                if (offset < 1)
                {
                    return(false);
                }

                // Does the tag name end properly?
                if (c != '>' && c != '/' && !Char.IsWhiteSpace(c))
                {
                    return(false);
                }
                if (tp.Peek(offset - 1) == ':')
                {
                    return(false);
                }

                // Our minimal validation has passed...
                tp.MoveAhead(offset);
                var length = tp.Position - startPos;

                Name = tp.Substring(startPos, length).ToLower();

                // Force void elements to be self-closing tags.
                IsSelfClosingTag = IsVoidElement(Name);

                return(true);
            }
Пример #5
0
        string ExtractBlock(
            ITextParser parser,
            char openChar,
            char closeChar)
        {
            // Track delimiter depth
            var depth = 1;

            // Extract characters between delimiters
            parser.MoveAhead();
            var start = parser.Position;

            while (!parser.EndOfText)
            {
                if (parser.Peek() == openChar)
                {
                    // Increase block depth
                    depth++;
                }
                else if (parser.Peek() == closeChar)
                {
                    // Decrease block depth
                    depth--;
                    // Test for end of block
                    if (depth == 0)
                    {
                        break;
                    }
                }
                else if (parser.Peek() == '"')
                {
                    // Don't count delimiters within quoted text
                    ExtractQuote(parser);
                }

                // Move to next character
                parser.MoveAhead();
            }

            return(parser.Extract(start, parser.Position));
        }
Пример #6
0
        string ExtractQuote(ITextParser parser)
        {
            // Extract contents of quote
            parser.MoveAhead();
            var start = parser.Position;

            while (!parser.EndOfText && parser.Peek() != '"')
            {
                parser.MoveAhead();
            }
            return(parser.Extract(start, parser.Position));
        }
Пример #7
0
 /// <summary>
 /// Appends the entity substring that begins at
 /// the current location, which is assumed to
 /// be '&amp;', to the output.
 /// </summary>
 private static void AppendEntityToOutput(ITextParser tp, StringBuilder sb)
 {
     while (!tp.EndOfText)
     {
         var c = tp.Peek();
         sb.Append(c);
         tp.MoveAhead();
         if (c == ';')
         {
             break;
         }
     }
 }
Пример #8
0
        /// <summary>
        /// Returns true if the substring starting at offset
        /// and continuing to the next ';' contains characters
        /// from "validChars" only.
        /// </summary>
        private static bool EntityContainsValidCharacters(ITextParser tp, int offset, string validChars)
        {
            char c;

            while ((c = tp.Peek(offset++)) != TextParser.NullChar)
            {
                if (validChars.IndexOf(c) == -1)
                {
                    return(false);
                }
                if (c == ';')
                {
                    return(true);
                }
            }
            return(false);
        }
Пример #9
0
        private void Converter()
        {
            while (!tp.EndOfText)
            {
                var c = tp.Peek();
                switch (c)
                {
                case '<':
                    var tag = GetTag();
                    if (tag != null)
                    {
                        HandleTag(tag);
                    }
                    break;

                case '>':
                    xml.Append("&gt;");
                    tp.MoveAhead();
                    break;

                case '&':
                    EntityConverter.Convert(tp, xml);
                    break;

                default:
                    xml.Append(c);
                    tp.MoveAhead();
                    break;
                }
            }

            CloseOpenElements(openElements.Count);
        }
Пример #10
0
            /// <summary>
            /// Parses attributes from the tag using the given
            /// <see cref="ITextParser"/> <paramref name="tp"/>. The
            /// <see cref="ITextParser"/> position should be set to
            /// the first character of the tag following the element name.
            /// </summary>
            private bool ParseAttributes(ITextParser tp)
            {
                const char   kDoubleQuote       = '"';
                const char   kSingleQuote       = '\'';
                const string kDoubleQuoteEntity = "&#22;";

                if (tp.Peek() == '>')
                {
                    tp.MoveAhead();
                    return(true);
                }

                var sb = new StringBuilder();

                // Copy current input character
                void Copy()
                {
                    sb.Append(tp.Peek());
                    tp.MoveAhead();
                }

                // Copy input characters until fence character or end of tag
                void CopyTo(char fence)
                {
                    while (!tp.EndOfText)
                    {
                        var c = tp.Peek();
                        if (c == fence || c == '>')
                        {
                            break;
                        }
                        if (c != kDoubleQuote)
                        {
                            sb.Append(c);
                        }
                        else
                        {
                            sb.Append(kDoubleQuoteEntity);
                        }
                        tp.MoveAhead();
                    }
                }

                // Copy attributes
                var startPos = tp.Position;

                while (!tp.EndOfText)
                {
                    var c = tp.Peek();
                    if (c == '>' || c == '<')
                    {
                        break;
                    }
                    switch (c)
                    {
                    case '=':
                        Copy();
                        c = tp.Peek();
                        if (c == kDoubleQuote)
                        {
                            // Copy double-quoted value
                            Copy();
                            CopyTo(kDoubleQuote);
                            sb.Append(kDoubleQuote);
                            tp.MoveAhead();
                        }
                        else if (c == kSingleQuote)
                        {
                            // Copy single-quoted value, but with double-quotes
                            sb.Append(kDoubleQuote);
                            tp.MoveAhead();
                            CopyTo(kSingleQuote);
                            sb.Append(kDoubleQuote);
                            tp.MoveAhead();
                        }
                        else
                        {
                            // Copy unqouted value adding double-quotes
                            sb.Append(kDoubleQuote);
                            CopyTo(' ');
                            sb.Append(kDoubleQuote);
                        }
                        break;

                    default:
                        Copy();
                        break;
                    }
                }

                if (tp.Peek() != '>')
                {
                    return(false);
                }

                if (tp.CharAt(tp.Position - 1) == '/')
                {
                    IsSelfClosingTag = true;
                    sb.Length        = sb.Length - 1;
                }

                AttributesPart = sb.ToString();
                if (AttributesPart.IndexOf('&') != -1)
                {
                    AttributesPart = ResolveEntities(AttributesPart);
                }

                tp.MoveAhead();

                return(true);
            }