/// <summary> /// Parses the text of an HTML tag and updates the <see cref="Name"/> and /// <see cref="AttributesPart"/> properties. <see cref="ITextParser"/> /// <paramref name="tp"/> should be pointed at the '<' character that /// starts an HTML tag. /// </summary> public bool Parse(ITextParser tp) { if (tp.Peek() != '<') { return(false); } tp.MoveAhead(); if (tp.Peek() == '/') { tp.MoveAhead(); IsEndTag = true; } if (tp.EndOfText) { return(false); } if (!ParseName(tp)) { return(false); } if (!ParseAttributes(tp)) { return(false); } return(true); }
/// <summary> /// Gets a named entity starting at the currrent /// character ('&') and proceeding until ';'. /// Returns null if the substring does not match /// the expected entity sequence. /// </summary> private static string GetNamedEntity(ITextParser tp) { const string kValidCharacters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; var startPos = tp.Position + 1; var offset = 1; char c; while (true) { c = tp.Peek(offset); if (kValidCharacters.IndexOf(c) == -1) { break; } offset++; } if (c != ';') { return(null); } var length = offset - 1; if (length < 1) { return(null); } return(tp.Substring(startPos, length)); }
/// <summary> /// Returns true if the substring starting at /// the current position and continuing to /// the next ';' contains a decimal or hex /// entity sequence. On entry, tp should be /// pointing at '&'. /// </summary> private static bool EntityIsHexOrDecimal(ITextParser tp) { // Handle hex and numeric if (tp.Peek(1) == '#') { var c = tp.Peek(2); if (c == 'x' || c == 'X') { return(EntityContainsValidCharacters(tp, 3, hex)); } else { return(EntityContainsValidCharacters(tp, 2, digits)); } } return(false); }
/// <summary> /// Parses the element name from the tag using the given /// <see cref="ITextParser"/> <paramref name="tp"/>. The /// <see cref="ITextParser"/> position should be set to /// the first character of the tag following the "<", /// or following the "</" for end tags. /// Returns true if a syntactically valid name is found. /// </summary> private bool ParseName(ITextParser tp) { const string nameCharacters = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:"; var startPos = tp.Position; var offset = 0; char c; while ((c = tp.Peek(offset)) != TextParser.NullChar) { if (nameCharacters.IndexOf(c) == -1) { break; } offset++; } // Did we get any valid characters? if (offset < 1) { return(false); } // Does the tag name end properly? if (c != '>' && c != '/' && !Char.IsWhiteSpace(c)) { return(false); } if (tp.Peek(offset - 1) == ':') { return(false); } // Our minimal validation has passed... tp.MoveAhead(offset); var length = tp.Position - startPos; Name = tp.Substring(startPos, length).ToLower(); // Force void elements to be self-closing tags. IsSelfClosingTag = IsVoidElement(Name); return(true); }
string ExtractBlock( ITextParser parser, char openChar, char closeChar) { // Track delimiter depth var depth = 1; // Extract characters between delimiters parser.MoveAhead(); var start = parser.Position; while (!parser.EndOfText) { if (parser.Peek() == openChar) { // Increase block depth depth++; } else if (parser.Peek() == closeChar) { // Decrease block depth depth--; // Test for end of block if (depth == 0) { break; } } else if (parser.Peek() == '"') { // Don't count delimiters within quoted text ExtractQuote(parser); } // Move to next character parser.MoveAhead(); } return(parser.Extract(start, parser.Position)); }
string ExtractQuote(ITextParser parser) { // Extract contents of quote parser.MoveAhead(); var start = parser.Position; while (!parser.EndOfText && parser.Peek() != '"') { parser.MoveAhead(); } return(parser.Extract(start, parser.Position)); }
/// <summary> /// Appends the entity substring that begins at /// the current location, which is assumed to /// be '&', to the output. /// </summary> private static void AppendEntityToOutput(ITextParser tp, StringBuilder sb) { while (!tp.EndOfText) { var c = tp.Peek(); sb.Append(c); tp.MoveAhead(); if (c == ';') { break; } } }
/// <summary> /// Returns true if the substring starting at offset /// and continuing to the next ';' contains characters /// from "validChars" only. /// </summary> private static bool EntityContainsValidCharacters(ITextParser tp, int offset, string validChars) { char c; while ((c = tp.Peek(offset++)) != TextParser.NullChar) { if (validChars.IndexOf(c) == -1) { return(false); } if (c == ';') { return(true); } } return(false); }
private void Converter() { while (!tp.EndOfText) { var c = tp.Peek(); switch (c) { case '<': var tag = GetTag(); if (tag != null) { HandleTag(tag); } break; case '>': xml.Append(">"); tp.MoveAhead(); break; case '&': EntityConverter.Convert(tp, xml); break; default: xml.Append(c); tp.MoveAhead(); break; } } CloseOpenElements(openElements.Count); }
/// <summary> /// Parses attributes from the tag using the given /// <see cref="ITextParser"/> <paramref name="tp"/>. The /// <see cref="ITextParser"/> position should be set to /// the first character of the tag following the element name. /// </summary> private bool ParseAttributes(ITextParser tp) { const char kDoubleQuote = '"'; const char kSingleQuote = '\''; const string kDoubleQuoteEntity = ""; if (tp.Peek() == '>') { tp.MoveAhead(); return(true); } var sb = new StringBuilder(); // Copy current input character void Copy() { sb.Append(tp.Peek()); tp.MoveAhead(); } // Copy input characters until fence character or end of tag void CopyTo(char fence) { while (!tp.EndOfText) { var c = tp.Peek(); if (c == fence || c == '>') { break; } if (c != kDoubleQuote) { sb.Append(c); } else { sb.Append(kDoubleQuoteEntity); } tp.MoveAhead(); } } // Copy attributes var startPos = tp.Position; while (!tp.EndOfText) { var c = tp.Peek(); if (c == '>' || c == '<') { break; } switch (c) { case '=': Copy(); c = tp.Peek(); if (c == kDoubleQuote) { // Copy double-quoted value Copy(); CopyTo(kDoubleQuote); sb.Append(kDoubleQuote); tp.MoveAhead(); } else if (c == kSingleQuote) { // Copy single-quoted value, but with double-quotes sb.Append(kDoubleQuote); tp.MoveAhead(); CopyTo(kSingleQuote); sb.Append(kDoubleQuote); tp.MoveAhead(); } else { // Copy unqouted value adding double-quotes sb.Append(kDoubleQuote); CopyTo(' '); sb.Append(kDoubleQuote); } break; default: Copy(); break; } } if (tp.Peek() != '>') { return(false); } if (tp.CharAt(tp.Position - 1) == '/') { IsSelfClosingTag = true; sb.Length = sb.Length - 1; } AttributesPart = sb.ToString(); if (AttributesPart.IndexOf('&') != -1) { AttributesPart = ResolveEntities(AttributesPart); } tp.MoveAhead(); return(true); }