Beispiel #1
0
            /// <summary>
            /// Parses the text of an HTML tag and updates the <see cref="Name"/> and
            /// <see cref="AttributesPart"/> properties. <see cref="ITextParser"/>
            /// <paramref name="tp"/> should be pointed at the '&lt;' character that
            /// starts an HTML tag.
            /// </summary>
            public bool Parse(ITextParser tp)
            {
                if (tp.Peek() != '<')
                {
                    return(false);
                }
                tp.MoveAhead();

                if (tp.Peek() == '/')
                {
                    tp.MoveAhead();
                    IsEndTag = true;
                }

                if (tp.EndOfText)
                {
                    return(false);
                }

                if (!ParseName(tp))
                {
                    return(false);
                }

                if (!ParseAttributes(tp))
                {
                    return(false);
                }

                return(true);
            }
Beispiel #2
0
        private void Converter()
        {
            while (!tp.EndOfText)
            {
                var c = tp.Peek();
                switch (c)
                {
                case '<':
                    var tag = GetTag();
                    if (tag != null)
                    {
                        HandleTag(tag);
                    }
                    break;

                case '>':
                    xml.Append("&gt;");
                    tp.MoveAhead();
                    break;

                case '&':
                    EntityConverter.Convert(tp, xml);
                    break;

                default:
                    xml.Append(c);
                    tp.MoveAhead();
                    break;
                }
            }

            CloseOpenElements(openElements.Count);
        }
Beispiel #3
0
        string ExtractQuote(ITextParser parser)
        {
            // Extract contents of quote
            parser.MoveAhead();
            var start = parser.Position;

            while (!parser.EndOfText && parser.Peek() != '"')
            {
                parser.MoveAhead();
            }
            return(parser.Extract(start, parser.Position));
        }
Beispiel #4
0
        string ExtractBlock(
            ITextParser parser,
            char openChar,
            char closeChar)
        {
            // Track delimiter depth
            var depth = 1;

            // Extract characters between delimiters
            parser.MoveAhead();
            var start = parser.Position;

            while (!parser.EndOfText)
            {
                if (parser.Peek() == openChar)
                {
                    // Increase block depth
                    depth++;
                }
                else if (parser.Peek() == closeChar)
                {
                    // Decrease block depth
                    depth--;
                    // Test for end of block
                    if (depth == 0)
                    {
                        break;
                    }
                }
                else if (parser.Peek() == '"')
                {
                    // Don't count delimiters within quoted text
                    ExtractQuote(parser);
                }

                // Move to next character
                parser.MoveAhead();
            }

            return(parser.Extract(start, parser.Position));
        }
        /// <summary>
        /// Validates an HTML entity and appends it to the output.
        /// </summary>
        public static void Convert(ITextParser tp, StringBuilder sb)
        {
            if (EntityIsHexOrDecimal(tp))
            {
                AppendEntityToOutput(tp, sb);
                return;
            }

            var entity = GetNamedEntity(tp);

            if (entity != null)
            {
                AppendEntityValue(sb, entity);
                tp.MoveAhead(entity.Length + 2);
                return;
            }

            // Not an entity; encode the & and ignore the rest.
            sb.Append("&amp;");
            tp.MoveAhead();
        }
 /// <summary>
 /// Appends the entity substring that begins at
 /// the current location, which is assumed to
 /// be '&amp;', to the output.
 /// </summary>
 private static void AppendEntityToOutput(ITextParser tp, StringBuilder sb)
 {
     while (!tp.EndOfText)
     {
         var c = tp.Peek();
         sb.Append(c);
         tp.MoveAhead();
         if (c == ';')
         {
             break;
         }
     }
 }
Beispiel #7
0
            /// <summary>
            /// Parses the element name from the tag using the given
            /// <see cref="ITextParser"/> <paramref name="tp"/>. The
            /// <see cref="ITextParser"/> position should be set to
            /// the first character of the tag following the "&lt;",
            /// or following the "&lt;/" for end tags.
            /// Returns true if a syntactically valid name is found.
            /// </summary>
            private bool ParseName(ITextParser tp)
            {
                const string nameCharacters = "abcdefghijklmnopqrstuvwxyz"
                                              + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:";

                var startPos = tp.Position;

                var  offset = 0;
                char c;

                while ((c = tp.Peek(offset)) != TextParser.NullChar)
                {
                    if (nameCharacters.IndexOf(c) == -1)
                    {
                        break;
                    }
                    offset++;
                }

                // Did we get any valid characters?
                if (offset < 1)
                {
                    return(false);
                }

                // Does the tag name end properly?
                if (c != '>' && c != '/' && !Char.IsWhiteSpace(c))
                {
                    return(false);
                }
                if (tp.Peek(offset - 1) == ':')
                {
                    return(false);
                }

                // Our minimal validation has passed...
                tp.MoveAhead(offset);
                var length = tp.Position - startPos;

                Name = tp.Substring(startPos, length).ToLower();

                // Force void elements to be self-closing tags.
                IsSelfClosingTag = IsVoidElement(Name);

                return(true);
            }
Beispiel #8
0
            /// <summary>
            /// Parses attributes from the tag using the given
            /// <see cref="ITextParser"/> <paramref name="tp"/>. The
            /// <see cref="ITextParser"/> position should be set to
            /// the first character of the tag following the element name.
            /// </summary>
            private bool ParseAttributes(ITextParser tp)
            {
                const char   kDoubleQuote       = '"';
                const char   kSingleQuote       = '\'';
                const string kDoubleQuoteEntity = "&#22;";

                if (tp.Peek() == '>')
                {
                    tp.MoveAhead();
                    return(true);
                }

                var sb = new StringBuilder();

                // Copy current input character
                void Copy()
                {
                    sb.Append(tp.Peek());
                    tp.MoveAhead();
                }

                // Copy input characters until fence character or end of tag
                void CopyTo(char fence)
                {
                    while (!tp.EndOfText)
                    {
                        var c = tp.Peek();
                        if (c == fence || c == '>')
                        {
                            break;
                        }
                        if (c != kDoubleQuote)
                        {
                            sb.Append(c);
                        }
                        else
                        {
                            sb.Append(kDoubleQuoteEntity);
                        }
                        tp.MoveAhead();
                    }
                }

                // Copy attributes
                var startPos = tp.Position;

                while (!tp.EndOfText)
                {
                    var c = tp.Peek();
                    if (c == '>' || c == '<')
                    {
                        break;
                    }
                    switch (c)
                    {
                    case '=':
                        Copy();
                        c = tp.Peek();
                        if (c == kDoubleQuote)
                        {
                            // Copy double-quoted value
                            Copy();
                            CopyTo(kDoubleQuote);
                            sb.Append(kDoubleQuote);
                            tp.MoveAhead();
                        }
                        else if (c == kSingleQuote)
                        {
                            // Copy single-quoted value, but with double-quotes
                            sb.Append(kDoubleQuote);
                            tp.MoveAhead();
                            CopyTo(kSingleQuote);
                            sb.Append(kDoubleQuote);
                            tp.MoveAhead();
                        }
                        else
                        {
                            // Copy unqouted value adding double-quotes
                            sb.Append(kDoubleQuote);
                            CopyTo(' ');
                            sb.Append(kDoubleQuote);
                        }
                        break;

                    default:
                        Copy();
                        break;
                    }
                }

                if (tp.Peek() != '>')
                {
                    return(false);
                }

                if (tp.CharAt(tp.Position - 1) == '/')
                {
                    IsSelfClosingTag = true;
                    sb.Length        = sb.Length - 1;
                }

                AttributesPart = sb.ToString();
                if (AttributesPart.IndexOf('&') != -1)
                {
                    AttributesPart = ResolveEntities(AttributesPart);
                }

                tp.MoveAhead();

                return(true);
            }