/// <summary> /// Parse a start tag /// </summary> protected ParsedToken ParseStartTag() { _StartTagPosition = _CurrentPosition; _TagBuffer = SourceReader.OpenBuffer(); CharInfo c = ReadChar(); // Comments ? if (c == '!') { // Expect '--' or 'DOCTYPE' c = ReadChar(); if (Char.IsLetter(c.AsChar)) { SaveChar(c); return(ParseDoctype()); } else if (c == '-') { if (ReadChar() != '-') { throw new ParseError("Comments need to start with '<!--'.", ReadPosition); } return(ParseComment()); } throw new ParseError("Comment or DOCTYPE expected.", ReadPosition); } // Process instruction ? if (c == '?') { _State = ParseState.ProcessInstruction; c = ReadChar(false); // Pass whitespace while (c != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar)) { c = ReadChar(false); } } else if (c == '/') { _State = ParseState.EndTag; c = ReadChar(false); if (c == CharInfo.EOF || !Char.IsLetterOrDigit(c.AsChar)) { SaveChar(c); throw new ParseError("Invalid tag name. Need to start with an alphanumeric", ReadPosition); } } else { _State = ParseState.Tag; } // Tagname if (c == CharInfo.EOF || !Char.IsLetterOrDigit(c.AsChar)) { throw new ParseError("Invalid tag name. Need to start with an alphanumeric", ReadPosition); } // Loop tag name _CurrentRead = null; AddToCurrentRead(c); while ((c = ReadChar(false)) != CharInfo.EOF && (Char.IsLetterOrDigit(c.AsChar) || c == '.' || c == ':' || c == '-')) { AddToCurrentRead(c); } // If EndTag if (_State == ParseState.EndTag) { _CurrentToken = ParsedTag.EndTag(GetCurrentRead(true)); _CurrentToken.Position = _StartTagPosition; // Pass whitespace while (c != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar)) { c = ReadChar(false); } try { if (c == CharInfo.EOF) { throw new ParseError("Unexpected end of stream.", ReadPosition); } if (IsAttributeNameChar(c.AsChar)) { throw new ParseError("End tag can't contains attribute.", ReadPosition); } if (c != '>') { throw new ParseError("Unexpected char. End tag not closed.", ReadPosition); } } catch { // Reset steam while (c != CharInfo.EOF && c != '<' && c != '>') { c = ReadChar(false); } if (c == '<') { SaveChar(c); } throw; } _State = ParseState.Content; ResetTagBuffer(); var result = _CurrentToken; _CurrentToken = null; return(result); } // Create the tag if (c != CharInfo.EOF) { SaveChar(c); } _CurrentToken = _State == ParseState.Tag ? ParsedTag.OpenTag(GetCurrentRead(true)) : ParsedTag.OpenProcessInstruction(GetCurrentRead(true)); _CurrentToken.Position = _StartTagPosition; return(_CurrentToken); }
/// <summary> /// Parse a content as raw text. /// </summary> /// <remarks> /// This method is used for parsing the content of the script, style tag content. /// The parsing is continue until matching the end of the <paramref name="tag"/>. /// If <paramref name="tag"/> is null or empty then we accept all endtag. /// </remarks> /// <exception cref="InvalidOperationException"> /// Raised when the parser is not in a normal state : all tag need to be closed. /// </exception> /// <param name="tag">Tag name for the end tag expected.</param> /// <returns>Content</returns> public ParsedText ParseContentText(String tag) { // Verify if (this._State != ParseState.Content) { throw new InvalidOperationException("Can't read a content in a opened tag."); } // Read loop var start = SourceReader.Position; CharInfo c; while ((c = ReadChar(false)) != CharInfo.EOF) { // End detected ? if (c == '<') { var endTagPos = c.Position; StringBuilder saveTag = new StringBuilder(15); saveTag.Append(c); while ((c = ReadChar(false)) != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar)) { saveTag.Append(c.AsChar); } if (c == '/') { // Pass '/' saveTag.Append(c); while ((c = ReadChar(false)) != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar)) { saveTag.Append(c.AsChar); } if (c != CharInfo.EOF) { // Pass tag name StringBuilder tagName = new StringBuilder(10); saveTag.Append(c.AsChar); tagName.Append(c.AsChar); while ((c = ReadChar(false)) != CharInfo.EOF && IsAttributeNameChar(c.AsChar)) { saveTag.Append(c.AsChar); tagName.Append(c.AsChar); } // We find the good end tag ? if (c != CharInfo.EOF) { if (String.IsNullOrEmpty(tag) || String.Equals(tagName.ToString(), tag, StringComparison.OrdinalIgnoreCase)) { SaveChar(c); // Search the good end while ((c = ReadChar(false)) != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar)) { saveTag.Append(c.AsChar); } if (c == '>') { // Save the end tag for the next parse _CurrentToken = ParsedTag.EndTag(tagName.ToString()); _CurrentToken.Position = endTagPos; c = CharInfo.EOF; // Don't save the char // Exit the loop break; } } } } } // If here then we don't find a good end tag we convert to 'text' var etp = endTagPos; foreach (var st in saveTag.ToString()) { AddToCurrentRead(new CharInfo(st, etp++)); } } // AddToCurrentRead(c); } if (c != CharInfo.EOF) { SaveChar(c); } // Returns parse result LastParsed = new ParsedText() { Position = start, Text = HEntity.HtmlDecode(GetCurrentRead(true), RemoveUnknownOrInvalidEntities) }; return((ParsedText)LastParsed); }