/// <summary> /// More http://www.w3.org/TR/REC-xml/#sec-pi. /// </summary> /// <param name="c">The next input character.</param> /// <param name="pi">The processing instruction token.</param> protected XmlToken ProcessingTarget(Char c, XmlPIToken pi) { while (true) { if (c.IsSpaceCharacter()) { pi.Target = _stringBuffer.ToString(); _stringBuffer.Clear(); return ProcessingContent(_src.Next, pi); } else if (c == Specification.QM) { pi.Target = _stringBuffer.ToString(); _stringBuffer.Clear(); return ProcessingContent(c, pi); } else if (c == Specification.NULL) { RaiseErrorOccurred(ErrorCode.NULL); c = Specification.REPLACEMENT; } else if (c == Specification.EOF) { RaiseErrorOccurred(ErrorCode.EOF); return XmlToken.EOF; } _stringBuffer.Append(c); c = _src.Next; } }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#attdecls. /// </summary> /// <param name="c">The next input character.</param> DtdAttributeToken AttributeDeclaration(Char c) { var decl = new DtdAttributeToken(); var canContinue = false; if (c.IsSpaceCharacter()) canContinue = DeclarationNameBefore(_src.Next, decl); else if (c == Specification.EOF) throw new ArgumentException("The document ended unexpectedly."); else { RaiseErrorOccurred(ErrorCode.UndefinedMarkupDeclaration); canContinue = DeclarationNameBefore(c, decl); } c = _src.Current; if (canContinue) { while (true) { while (c.IsSpaceCharacter()) c = _src.Next; if (c.IsNameStart()) { _stringBuffer.Clear(); decl.Attributes.Add(AttributeDeclarationName(c)); c = _src.Current; continue; } break; } } return AttributeDeclarationAfter(c, decl); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-Attribute. /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> XmlToken AttributeName(Char c, XmlTagToken tag) { while (c.IsXmlName()) { _stringBuffer.Append(c); c = GetNext(); } var name = FlushBuffer(); if (!String.IsNullOrEmpty(tag.GetAttribute(name))) throw XmlParseError.XmlUniqueAttribute.At(GetCurrentPosition()); tag.AddAttribute(name); if (c.IsSpaceCharacter()) { do c = GetNext(); while (c.IsSpaceCharacter()); } if (c == Symbols.Equality) return AttributeBeforeValue(GetNext(), tag); throw XmlParseError.XmlInvalidAttribute.At(GetCurrentPosition()); }
/// <summary> /// The doctype finalizer. /// </summary> /// <param name="c">The next input character.</param> /// <param name="doctype">The current doctype token.</param> /// <returns>The emitted token.</returns> XmlToken DoctypeAfter(Char c, XmlDoctypeToken doctype) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.GreaterThan) return doctype; throw XmlParseError.DoctypeInvalid.At(GetCurrentPosition()); }
/// <summary> /// See 8.2.4.62 After DOCTYPE system keyword state /// </summary> /// <param name="c">The next input character.</param> /// <param name="doctype">The current doctype token.</param> /// <returns>The emitted token.</returns> XmlToken DoctypeSystem(Char c, XmlDoctypeToken doctype) { if (c.IsSpaceCharacter()) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote) { doctype.SystemIdentifier = String.Empty; return DoctypeSystemIdentifierValue(GetNext(), c, doctype); } } throw XmlParseError.DoctypeInvalid.At(GetCurrentPosition()); }
/// <summary> /// See 8.2.4.55 After DOCTYPE name state /// </summary> /// <param name="c">The next input character.</param> /// <param name="doctype">The current doctype token.</param> /// <returns>The emitted token.</returns> XmlToken DoctypeNameAfter(Char c, XmlDoctypeToken doctype) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.GreaterThan) return doctype; if (ContinuesWithSensitive(Keywords.Public)) { Advance(5); return DoctypePublic(GetNext(), doctype); } else if (ContinuesWithSensitive(Keywords.System)) { Advance(5); return DoctypeSystem(GetNext(), doctype); } else if (c == Symbols.SquareBracketOpen) { Advance(); return DoctypeAfter(GetNext(), doctype); } throw XmlParseError.DoctypeInvalid.At(GetCurrentPosition()); }
/// <summary> /// See 8.2.4.53 Before DOCTYPE name state /// </summary> /// <param name="c">The next input character.</param> XmlToken DoctypeNameBefore(Char c) { while (c.IsSpaceCharacter()) c = GetNext(); if (c.IsXmlNameStart()) { _stringBuffer.Append(c); return DoctypeName(GetNext(), NewDoctype()); } throw XmlParseError.DoctypeInvalid.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-XMLDecl. /// </summary> /// <param name="c">The next input character.</param> /// <param name="decl">The current declaration token.</param> XmlDeclarationToken DeclarationEnd(Char c, XmlDeclarationToken decl) { while (c.IsSpaceCharacter()) c = GetNext(); if (c != Symbols.QuestionMark || GetNext() != Symbols.GreaterThan) throw XmlParseError.XmlDeclarationInvalid.At(GetCurrentPosition()); return decl; }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-Attribute. /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> XmlToken AttributeAfterValue(Char c, XmlTagToken tag) { if (c.IsSpaceCharacter()) return AttributeBeforeName(GetNext(), tag); else if (c == Symbols.Solidus) return TagSelfClosing(GetNext(), tag); else if (c == Symbols.GreaterThan) return tag; throw XmlParseError.XmlInvalidAttribute.At(GetCurrentPosition()); }
/// <summary> /// See 8.2.4.52 DOCTYPE state /// </summary> /// <param name="c">The next input character.</param> HtmlToken Doctype(Char c) { if (c.IsSpaceCharacter()) { return DoctypeNameBefore(GetNext()); } else if (c == Symbols.EndOfFile) { RaiseErrorOccurred(HtmlParseError.EOF); Back(); return NewDoctype(true); } else { RaiseErrorOccurred(HtmlParseError.DoctypeUnexpected); return DoctypeNameBefore(c); } }
/// <summary> /// See 8.2.4.69 Tokenizing character references /// </summary> /// <param name="c">The next input character.</param> /// <param name="allowedCharacter">The additionally allowed character if there is one.</param> void AppendCharacterReference(Char c, Char allowedCharacter = Symbols.Null) { if (c.IsSpaceCharacter() || c == Symbols.LessThan || c == Symbols.EndOfFile || c == Symbols.Ampersand || c == allowedCharacter) { Back(); _stringBuffer.Append(Symbols.Ampersand); return; } var entity = default(String); if (c == Symbols.Num) { var exp = 10; var basis = 1; var num = 0; var nums = new List<Int32>(); c = GetNext(); var isHex = c == 'x' || c == 'X'; if (isHex) { exp = 16; while ((c = GetNext()).IsHex()) nums.Add(c.FromHex()); } else { while (c.IsDigit()) { nums.Add(c.FromHex()); c = GetNext(); } } for (var i = nums.Count - 1; i >= 0; i--) { num += nums[i] * basis; basis *= exp; } if (nums.Count == 0) { Back(2); if (isHex) Back(); RaiseErrorOccurred(HtmlParseError.CharacterReferenceWrongNumber); _stringBuffer.Append(Symbols.Ampersand); return; } if (c != Symbols.Semicolon) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceSemicolonMissing); Back(); } if (Entities.IsInCharacterTable(num)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidCode); entity = Entities.GetSymbolFromTable(num); } else if (Entities.IsInvalidNumber(num)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidNumber); entity = Symbols.Replacement.ToString(); } else { if (Entities.IsInInvalidRange(num)) RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidRange); entity = Entities.Convert(num); } } else { var consumed = 0; var start = InsertionPoint - 1; var reference = new Char[31]; var index = 0; var chr = Current; do { if (chr == Symbols.Semicolon || !chr.IsName()) break; reference[index++] = chr; var value = new String(reference, 0, index); chr = GetNext(); consumed++; value = chr == Symbols.Semicolon ? Entities.GetSymbol(value) : Entities.GetSymbolWithoutSemicolon(value); if (value != null) { consumed = 0; entity = value; } } while (chr != Symbols.EndOfFile && index < 31); Back(consumed); chr = Current; if (chr != Symbols.Semicolon) { if (allowedCharacter != Symbols.Null && (chr == Symbols.Equality || chr.IsAlphanumericAscii())) { if (chr == Symbols.Equality) RaiseErrorOccurred(HtmlParseError.CharacterReferenceAttributeEqualsFound); InsertionPoint = start; _stringBuffer.Append(Symbols.Ampersand); return; } Back(); RaiseErrorOccurred(HtmlParseError.CharacterReferenceNotTerminated); } if (entity == null) { _stringBuffer.Append(Symbols.Ampersand); return; } } _stringBuffer.Append(entity); }
HtmlToken CreateIfAppropriate(Char c) { var isspace = c.IsSpaceCharacter(); var isclosed = c == Symbols.GreaterThan; var isslash = c == Symbols.Solidus; var hasLength = _stringBuffer.Length == _lastStartTag.Length; if (hasLength && (isspace || isclosed || isslash) && _stringBuffer.ToString().Equals(_lastStartTag, StringComparison.Ordinal)) { var tag = NewTagClose(); _stringBuffer.Clear(); if (isspace) { tag.Name = _lastStartTag; return AttributeBeforeName(tag); } else if (isslash) { tag.Name = _lastStartTag; return TagSelfClosing(tag); } else if (isclosed) { tag.Name = _lastStartTag; return EmitTag(tag); } } return null; }
/// <summary> /// See 8.2.4.40 Attribute value (unquoted) state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> HtmlToken AttributeUnquotedValue(Char c, HtmlTagToken tag) { while (true) { if (c == Symbols.GreaterThan) { tag.SetAttributeValue(_stringBuffer.ToString()); _stringBuffer.Clear(); return EmitTag(tag); } else if (c.IsSpaceCharacter()) { tag.SetAttributeValue(_stringBuffer.ToString()); _stringBuffer.Clear(); return AttributeBeforeName(tag); } else if (c == Symbols.Ampersand) { AppendCharacterReference(GetNext(), Symbols.GreaterThan); } else if (c == Symbols.Null) { RaiseErrorOccurred(HtmlParseError.Null); _stringBuffer.Append(Symbols.Replacement); } else if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote || c == Symbols.LessThan || c == Symbols.Equality || c == Symbols.CurvedQuote) { RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid); _stringBuffer.Append(c); } else if (c != Symbols.EndOfFile) { _stringBuffer.Append(c); } else { return NewEof(); } c = GetNext(); } }
/// <summary> /// See 8.2.4.53 Before DOCTYPE name state /// </summary> /// <param name="c">The next input character.</param> HtmlToken DoctypeNameBefore(Char c) { while (c.IsSpaceCharacter()) c = GetNext(); if (c.IsUppercaseAscii()) { var doctype = NewDoctype(false); _stringBuffer.Append(Char.ToLower(c)); return DoctypeName(doctype); } else if (c == Symbols.Null) { var doctype = NewDoctype(false); RaiseErrorOccurred(HtmlParseError.Null); _stringBuffer.Append(Symbols.Replacement); return DoctypeName(doctype); } else if (c == Symbols.GreaterThan) { var doctype = NewDoctype(true); _state = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); return doctype; } else if (c == Symbols.EndOfFile) { var doctype = NewDoctype(true); RaiseErrorOccurred(HtmlParseError.EOF); Back(); return doctype; } else { var doctype = NewDoctype(false); _stringBuffer.Append(c); return DoctypeName(doctype); } }
/// <summary> /// 4.4.11. Transform-function-whitespace state /// </summary> CssToken TransformFunctionWhitespace(Char current) { while (true) { current = Next; if (current == Specification.RoundBracketOpen) { Back(); return CssKeywordToken.Function(FlushBuffer()); } else if (!current.IsSpaceCharacter()) { Back(2); return CssKeywordToken.Ident(FlushBuffer()); } } }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-SDDecl. /// </summary> /// <param name="c">The next input character.</param> /// <param name="decl">The current declaration token.</param> XmlToken DeclarationStandaloneAfterName(Char c, XmlDeclarationToken decl) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.Equality) return DeclarationStandaloneBeforeValue(GetNext(), decl); throw XmlParseError.XmlDeclarationInvalid.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-SDDecl. /// </summary> /// <param name="c">The next input character.</param> /// <param name="decl">The current declaration token.</param> XmlToken DeclarationStandaloneBeforeValue(Char c, XmlDeclarationToken decl) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote) return DeclarationStandaloneValue(GetNext(), c, decl); throw XmlParseError.XmlDeclarationInvalid.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#sec-pi. /// </summary> /// <param name="c">The next input character.</param> /// <param name="pi">The processing instruction token.</param> XmlToken ProcessingTarget(Char c, XmlPIToken pi) { while (c.IsXmlName()) { _stringBuffer.Append(c); c = GetNext(); } pi.Target = FlushBuffer(); if (pi.Target.Isi(TagNames.Xml)) throw XmlParseError.XmlInvalidPI.At(GetCurrentPosition()); if (c == Symbols.QuestionMark) { c = GetNext(); if (c == Symbols.GreaterThan) return pi; } else if (c.IsSpaceCharacter()) return ProcessingContent(GetNext(), pi); throw XmlParseError.XmlInvalidPI.At(GetCurrentPosition()); }
/// <summary> /// See 8.2.4.52 DOCTYPE state /// </summary> /// <param name="c">The next input character.</param> XmlToken Doctype(Char c) { if (c.IsSpaceCharacter()) return DoctypeNameBefore(GetNext()); throw XmlParseError.DoctypeInvalid.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#dt-etag. /// </summary> /// <param name="c">The next input character.</param> XmlToken TagEnd(Char c) { if (c.IsXmlNameStart()) { do { _stringBuffer.Append(c); c = GetNext(); } while (c.IsXmlName()); while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.GreaterThan) { var tag = NewCloseTag(); tag.Name = FlushBuffer(); return tag; } } if (c == Symbols.EndOfFile) throw XmlParseError.EOF.At(GetCurrentPosition()); throw XmlParseError.XmlInvalidEndTag.At(GetCurrentPosition()); }
/// <summary> /// See 8.2.4.54 DOCTYPE name state /// </summary> /// <param name="c">The next input character.</param> /// <param name="doctype">The current doctype token.</param> /// <returns>The emitted token.</returns> XmlToken DoctypeName(Char c, XmlDoctypeToken doctype) { while (c.IsXmlName()) { _stringBuffer.Append(c); c = GetNext(); } doctype.Name = FlushBuffer(); if (c == Symbols.GreaterThan) return doctype; else if (c.IsSpaceCharacter()) return DoctypeNameAfter(GetNext(), doctype); throw XmlParseError.DoctypeInvalid.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-XMLDecl. /// </summary> /// <param name="c">The next input character.</param> XmlToken DeclarationStart(Char c) { if (!c.IsSpaceCharacter()) { _stringBuffer.Append(TagNames.Xml); return ProcessingTarget(c, NewProcessing()); } do c = GetNext(); while (c.IsSpaceCharacter()); if (ContinuesWithSensitive(AttributeNames.Version)) { Advance(6); return DeclarationVersionAfterName(GetNext(), NewDeclaration()); } throw XmlParseError.XmlDeclarationInvalid.At(GetCurrentPosition()); }
/// <summary> /// See 8.2.4.60 After DOCTYPE public identifier state /// </summary> /// <param name="c">The next input character.</param> /// <param name="doctype">The current doctype token.</param> /// <returns>The emitted token.</returns> XmlToken DoctypePublicIdentifierAfter(Char c, XmlDoctypeToken doctype) { if (c == Symbols.GreaterThan) return doctype; else if (c.IsSpaceCharacter()) return DoctypeBetween(GetNext(), doctype); throw XmlParseError.DoctypeInvalid.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-VersionInfo. /// </summary> /// <param name="c">The next input character.</param> /// <param name="q">The quote character.</param> /// <param name="decl">The current declaration token.</param> XmlToken DeclarationVersionValue(Char c, Char q, XmlDeclarationToken decl) { while (c != q) { if (c == Symbols.EndOfFile) throw XmlParseError.EOF.At(GetCurrentPosition()); _stringBuffer.Append(c); c = GetNext(); } decl.Version = FlushBuffer(); c = GetNext(); if (c.IsSpaceCharacter()) return DeclarationAfterVersion(c, decl); return DeclarationEnd(c, decl); }
/// <summary> /// See 8.2.4.66 After DOCTYPE system identifier state /// </summary> /// <param name="c">The next input character.</param> /// <param name="doctype">The current doctype token.</param> /// <returns>The emitted token.</returns> XmlToken DoctypeSystemIdentifierAfter(Char c, XmlDoctypeToken doctype) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.SquareBracketOpen) { Advance(); c = GetNext(); } return DoctypeAfter(c, doctype); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-EncodingDecl. /// </summary> /// <param name="c">The next input character.</param> /// <param name="decl">The current declaration token.</param> XmlToken DeclarationEncodingBeforeValue(Char c, XmlDeclarationToken decl) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote) { var q = c; c = GetNext(); if (c.IsLetter()) return DeclarationEncodingValue(c, q, decl); } throw XmlParseError.XmlDeclarationInvalid.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-Attribute. /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> XmlToken AttributeBeforeName(Char c, XmlTagToken tag) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.Solidus) return TagSelfClosing(GetNext(), tag); else if (c == Symbols.GreaterThan) return tag; else if (c == Symbols.EndOfFile) throw XmlParseError.EOF.At(GetCurrentPosition()); if (c.IsXmlNameStart()) { _stringBuffer.Append(c); return AttributeName(GetNext(), tag); } throw XmlParseError.XmlInvalidAttribute.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-EncodingDecl. /// </summary> /// <param name="c">The next input character.</param> /// <param name="q">The quote character.</param> /// <param name="decl">The current declaration token.</param> XmlToken DeclarationEncodingValue(Char c, Char q, XmlDeclarationToken decl) { do { if (c.IsAlphanumericAscii() || c == Symbols.Dot || c == Symbols.Underscore || c == Symbols.Minus) { _stringBuffer.Append(c); c = GetNext(); } else throw XmlParseError.XmlDeclarationInvalid.At(GetCurrentPosition()); } while (c != q); decl.Encoding = FlushBuffer(); c = GetNext(); if (c.IsSpaceCharacter()) return DeclarationAfterEncoding(c, decl); return DeclarationEnd(c, decl); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-Attribute. /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> XmlToken AttributeBeforeValue(Char c, XmlTagToken tag) { while (c.IsSpaceCharacter()) c = GetNext(); if (c == Symbols.DoubleQuote || c== Symbols.SingleQuote) return AttributeValue(GetNext(), c, tag); throw XmlParseError.XmlInvalidAttribute.At(GetCurrentPosition()); }
/// <summary> /// More http://www.w3.org/TR/REC-xml/#NT-SDDecl. /// </summary> /// <param name="c">The next input character.</param> /// <param name="decl">The current declaration token.</param> XmlToken DeclarationAfterEncoding(Char c, XmlDeclarationToken decl) { while (c.IsSpaceCharacter()) c = GetNext(); if (ContinuesWithSensitive(AttributeNames.Standalone)) { Advance(9); return DeclarationStandaloneAfterName(GetNext(), decl); } return DeclarationEnd(c, decl); }