/// <summary> /// 4.4.23. Unicode-range State /// </summary> CssToken UnicodeRange(Char current) { for (int i = 0; i < 6; i++) { if (!current.IsHex()) break; _stringBuffer.Append(current); current = Next; } if (_stringBuffer.Length != 6) { for (int i = 0; i < 6 - _stringBuffer.Length; i++) { if (current != Specification.QuestionMark) { current = Previous; break; } _stringBuffer.Append(current); current = Next; } var range = FlushBuffer(); var start = range.Replace(Specification.QuestionMark, '0'); var end = range.Replace(Specification.QuestionMark, 'F'); return CssToken.Range(start, end); } else if (current == Specification.Minus) { current = Next; if (current.IsHex()) { var start = _stringBuffer.ToString(); _stringBuffer.Clear(); for (int i = 0; i < 6; i++) { if (!current.IsHex()) { current = Previous; break; } _stringBuffer.Append(current); current = Next; } var end = FlushBuffer(); return CssToken.Range(start, end); } else { Back(2); return CssToken.Range(FlushBuffer(), null); } } else { Back(); return CssToken.Range(FlushBuffer(), null); } }
/// <summary> /// Called once an & character is being seen. /// </summary> /// <param name="c">The next character after the & character.</param> /// <returns>The entity token.</returns> String CharacterReference(Char c) { var start = _stringBuffer.Length; var hex = false; var numeric = c == Symbols.Num; if (numeric) { c = GetNext(); hex = c == 'x' || c == 'X'; if (hex) { c = GetNext(); while (c.IsHex()) { _stringBuffer.Append(c); c = GetNext(); } } else { while (c.IsDigit()) { _stringBuffer.Append(c); c = GetNext(); } } } else if (c.IsXmlNameStart()) { do { _stringBuffer.Append(c); c = GetNext(); } while (c.IsXmlName()); } if (c == Symbols.Semicolon && _stringBuffer.Length > start) { var length = _stringBuffer.Length - start; var content = _stringBuffer.ToString(start, length); _stringBuffer.Remove(start, length); if (numeric) { var number = numeric ? content.FromHex() : content.FromDec(); if (!number.IsValidAsCharRef()) throw XmlParseError.CharacterReferenceInvalidNumber.At(_position); return number.ConvertFromUtf32(); } else { var entity = _resolver.GetSymbol(content); if (String.IsNullOrEmpty(entity)) throw XmlParseError.CharacterReferenceInvalidCode.At(_position); return entity; } } throw XmlParseError.CharacterReferenceNotTerminated.At(GetCurrentPosition()); }
/// <summary> /// Consumes an escaped character AFTER the solidus has already been /// consumed. /// </summary> /// <returns>The escaped character.</returns> String ConsumeEscape(Char current) { if (current.IsHex()) { var escape = new List<Char>(); for (int i = 0; i < 6; i++) { escape.Add(current); current = Next; if (!current.IsHex()) break; } current = Previous; var code = Int32.Parse(new String(escape.ToArray()), NumberStyles.HexNumber); return Char.ConvertFromUtf32(code); } return current.ToString(); }
/// <summary> /// 4.4.1. Data state /// </summary> CssToken Data(Char current) { switch (current) { case Specification.LineFeed: case Specification.CarriageReturn: case Specification.Tab: case Specification.Space: do { current = Next; } while (current.IsSpaceCharacter()); if (_ignoreWs) return Data(current); Back(); return CssSpecialCharacter.Whitespace; case Specification.DoubleQuote: return StringDQ(Next); case Specification.Num: return HashStart(Next); case Specification.Dollar: current = Next; if (current == Specification.Equality) return CssMatchToken.Suffix; return CssToken.Delim(Previous); case Specification.SingleQuote: return StringSQ(Next); case Specification.RoundBracketOpen: return CssBracketToken.OpenRound; case Specification.RoundBracketClose: return CssBracketToken.CloseRound; case Specification.Asterisk: current = Next; if (current == Specification.Equality) return CssMatchToken.Substring; return CssToken.Delim(Previous); case Specification.Plus: { var c1 = Next; if (c1 == Specification.EndOfFile) { Back(); } else { var c2 = Next; Back(2); if (c1.IsDigit() || (c1 == Specification.Dot && c2.IsDigit())) return NumberStart(current); } return CssToken.Delim(current); } case Specification.Comma: return CssSpecialCharacter.Comma; case Specification.Dot: { var c = Next; if (c.IsDigit()) return NumberStart(Previous); return CssToken.Delim(Previous); } case Specification.Minus: { var c1 = Next; if (c1 == Specification.EndOfFile) { Back(); } else { var c2 = Next; Back(2); if (c1.IsDigit() || (c1 == Specification.Dot && c2.IsDigit())) return NumberStart(current); else if (c1.IsNameStart()) return IdentStart(current); else if (c1 == Specification.ReverseSolidus && !c2.IsLineBreak() && c2 != Specification.EndOfFile) return IdentStart(current); else if (c1 == Specification.Minus && c2 == Specification.GreaterThan) { Advance(2); if (_ignoreCs) return Data(Next); return CssCommentToken.Close; } } return CssToken.Delim(current); } case Specification.Solidus: current = Next; if (current == Specification.Asterisk) return Comment(Next); return CssToken.Delim(Previous); case Specification.ReverseSolidus: current = Next; if (current.IsLineBreak() || current == Specification.EndOfFile) { RaiseErrorOccurred(current == Specification.EndOfFile ? ErrorCode.EOF : ErrorCode.LineBreakUnexpected); return CssToken.Delim(Previous); } return IdentStart(Previous); case Specification.Colon: return CssSpecialCharacter.Colon; case Specification.Semicolon: return CssSpecialCharacter.Semicolon; case Specification.LessThan: current = Next; if (current == Specification.ExclamationMark) { current = Next; if (current == Specification.Minus) { current = Next; if (current == Specification.Minus) { if (_ignoreCs) return Data(Next); return CssCommentToken.Open; } current = Previous; } current = Previous; } return CssToken.Delim(Previous); case Specification.At: return AtKeywordStart(Next); case Specification.SquareBracketOpen: return CssBracketToken.OpenSquare; case Specification.SquareBracketClose: return CssBracketToken.CloseSquare; case Specification.Accent: current = Next; if (current == Specification.Equality) return CssMatchToken.Prefix; return CssToken.Delim(Previous); case Specification.CurlyBracketOpen: return CssBracketToken.OpenCurly; case Specification.CurlyBracketClose: return CssBracketToken.CloseCurly; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return NumberStart(current); case 'U': case 'u': current = Next; if (current == Specification.Plus) { current = Next; if (current.IsHex() || current == Specification.QuestionMark) return UnicodeRange(current); current = Previous; } return IdentStart(Previous); case Specification.Pipe: current = Next; if (current == Specification.Equality) return CssMatchToken.Dash; else if (current == Specification.Pipe) return CssToken.Column; return CssToken.Delim(Previous); case Specification.Tilde: current = Next; if (current == Specification.Equality) return CssMatchToken.Include; return CssToken.Delim(Previous); case Specification.EndOfFile: return null; case Specification.ExclamationMark: current = Next; if (current == Specification.Equality) return CssMatchToken.Not; return CssToken.Delim(Previous); default: if (current.IsNameStart()) return IdentStart(current); return CssToken.Delim(current); } }
/// <summary> /// Called once an & character is being seen. /// </summary> /// <param name="c">The next character after the & character.</param> /// <returns>The entity token.</returns> XmlEntityToken CharacterReference(Char c) { if (c == Symbols.Num) { c = GetNext(); var isHex = c == 'x' || c == 'X'; if (isHex) { c = GetNext(); while (c.IsHex()) { _stringBuffer.Append(c); c = GetNext(); } } else { while (c.IsDigit()) { _stringBuffer.Append(c); c = GetNext(); } } if (_stringBuffer.Length > 0 && c == Symbols.Semicolon) return NewEntity(numeric: true, hex: isHex); } else if (c.IsXmlNameStart()) { do { _stringBuffer.Append(c); c = GetNext(); } while (c.IsXmlName()); if (c == Symbols.Semicolon) return NewEntity(); } throw XmlParseError.CharacterReferenceNotTerminated.At(GetCurrentPosition()); }
protected String EReference(Char c) { var buffer = Pool.NewStringBuilder(); if (c.IsXmlNameStart()) { do { buffer.Append(c); c = _stream.Next; } while (c.IsXmlName()); var temp = buffer.ToPool(); if (temp.Length > 0 && c == Specification.SC) { var p = _container.GetEntity(temp); if (p != null) return p.NodeValue; } } else if (c == Specification.NUM) { c = _src.Next; var hex = c == 'x' || c == 'X'; if (hex) c = _stream.Next; while (hex ? c.IsHex() : c.IsDigit()) { buffer.Append(c); c = _src.Next; } var temp = buffer.ToPool(); if (temp.Length > 0 && c == Specification.SC) { var num = hex ? temp.FromHex() : temp.FromDec(); if (num.IsValidAsCharRef()) return Char.ConvertFromUtf32(num); throw Errors.Xml(ErrorCode.CharacterReferenceInvalidNumber); } } throw Errors.Xml(ErrorCode.CharacterReferenceNotTerminated); }
/// <summary> /// Consumes an escaped character AFTER the solidus has already been /// consumed. /// </summary> /// <returns>The escaped character.</returns> String ConsumeEscape(Char current) { if (current.IsHex()) { var escape = new Char[6]; var length = 0; while (length < escape.Length) { escape[length++] = current; current = GetNext(); if (!current.IsHex()) break; } if (current != Symbols.Space) Back(); var code = Int32.Parse(new String(escape, 0, length), NumberStyles.HexNumber); if (code.IsInvalid() == false) return code.ConvertFromUtf32(); current = Symbols.Replacement; } return current.ToString(); }
/// <summary> /// 4.4.1. Data state /// </summary> CssToken Data(Char current) { _position = GetCurrentPosition(); switch (current) { case Symbols.FormFeed: case Symbols.LineFeed: case Symbols.CarriageReturn: case Symbols.Tab: case Symbols.Space: return NewWhitespace(current); case Symbols.DoubleQuote: return StringDQ(); case Symbols.Num: return _valueMode ? ColorLiteral() : HashStart(); case Symbols.Dollar: current = GetNext(); if (current == Symbols.Equality) return NewSuffix(); return NewDelimiter(GetPrevious()); case Symbols.SingleQuote: return StringSQ(); case Symbols.RoundBracketOpen: return NewOpenRound(); case Symbols.RoundBracketClose: return NewCloseRound(); case Symbols.Asterisk: current = GetNext(); if (current == Symbols.Equality) return NewSubstring(); return NewDelimiter(GetPrevious()); case Symbols.Plus: { var c1 = GetNext(); if (c1 != Symbols.EndOfFile) { var c2 = GetNext(); Back(2); if (c1.IsDigit() || (c1 == Symbols.Dot && c2.IsDigit())) return NumberStart(current); } else Back(); return NewDelimiter(current); } case Symbols.Comma: return NewComma(); case Symbols.Dot: { var c = GetNext(); if (c.IsDigit()) return NumberStart(GetPrevious()); return NewDelimiter(GetPrevious()); } case Symbols.Minus: { var c1 = GetNext(); if (c1 != Symbols.EndOfFile) { var c2 = GetNext(); Back(2); if (c1.IsDigit() || (c1 == Symbols.Dot && c2.IsDigit())) return NumberStart(current); else if (c1.IsNameStart()) return IdentStart(current); else if (c1 == Symbols.ReverseSolidus && !c2.IsLineBreak() && c2 != Symbols.EndOfFile) return IdentStart(current); else if (c1 == Symbols.Minus && c2 == Symbols.GreaterThan) { Advance(2); return NewCloseComment(); } } else Back(); return NewDelimiter(current); } case Symbols.Solidus: current = GetNext(); if (current == Symbols.Asterisk) return Comment(); return NewDelimiter(GetPrevious()); case Symbols.ReverseSolidus: current = GetNext(); if (current.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return NewDelimiter(GetPrevious()); } else if (current == Symbols.EndOfFile) { RaiseErrorOccurred(CssParseError.EOF); return NewDelimiter(GetPrevious()); } return IdentStart(GetPrevious()); case Symbols.Colon: return NewColon(); case Symbols.Semicolon: return NewSemicolon(); case Symbols.LessThan: current = GetNext(); if (current == Symbols.ExclamationMark) { current = GetNext(); if (current == Symbols.Minus) { current = GetNext(); if (current == Symbols.Minus) return NewOpenComment(); current = GetPrevious(); } current = GetPrevious(); } return NewDelimiter(GetPrevious()); case Symbols.At: return AtKeywordStart(); case Symbols.SquareBracketOpen: return NewOpenSquare(); case Symbols.SquareBracketClose: return NewCloseSquare(); case Symbols.Accent: current = GetNext(); if (current == Symbols.Equality) return NewPrefix(); return NewDelimiter(GetPrevious()); case Symbols.CurlyBracketOpen: return NewOpenCurly(); case Symbols.CurlyBracketClose: return NewCloseCurly(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return NumberStart(current); case 'U': case 'u': current = GetNext(); if (current == Symbols.Plus) { current = GetNext(); if (current.IsHex() || current == Symbols.QuestionMark) return UnicodeRange(current); current = GetPrevious(); } return IdentStart(GetPrevious()); case Symbols.Pipe: current = GetNext(); if (current == Symbols.Equality) return NewDash(); else if (current == Symbols.Pipe) return NewColumn(); return NewDelimiter(GetPrevious()); case Symbols.Tilde: current = GetNext(); if (current == Symbols.Equality) return NewInclude(); return NewDelimiter(GetPrevious()); case Symbols.EndOfFile: return NewEof(); case Symbols.ExclamationMark: current = GetNext(); if (current == Symbols.Equality) return NewNot(); return NewDelimiter(GetPrevious()); default: if (current.IsNameStart()) return IdentStart(current); return NewDelimiter(current); } }
/// <summary> /// 4.4.23. Unicode-range State /// </summary> CssToken UnicodeRange(Char current) { for (int i = 0; i < 6; i++) { if (!current.IsHex()) break; _stringBuffer.Append(current); current = GetNext(); } if (_stringBuffer.Length != 6) { for (int i = 0; i < 6 - _stringBuffer.Length; i++) { if (current != Symbols.QuestionMark) { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } return NewRange(FlushBuffer()); } else if (current == Symbols.Minus) { current = GetNext(); if (current.IsHex()) { var start = _stringBuffer.ToString(); _stringBuffer.Clear(); for (int i = 0; i < 6; i++) { if (!current.IsHex()) { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } var end = FlushBuffer(); return NewRange(start, end); } else { Back(2); return NewRange(FlushBuffer()); } } else { Back(); return NewRange(FlushBuffer()); } }
/// <summary> /// 4.4.1. Data state /// </summary> CssToken Data(Char current) { switch (current) { case Specification.LF: case Specification.CR: case Specification.TAB: case Specification.SPACE: do { current = _src.Next; } while (current.IsSpaceCharacter()); if (_ignoreWs) return Data(current); _src.Back(); return CssSpecialCharacter.Whitespace; case Specification.DQ: return StringDQ(_src.Next); case Specification.NUM: return HashStart(_src.Next); case Specification.DOLLAR: current = _src.Next; if (current == Specification.EQ) return CssMatchToken.Suffix; return CssToken.Delim(_src.Previous); case Specification.SQ: return StringSQ(_src.Next); case Specification.RBO: return CssBracketToken.OpenRound; case Specification.RBC: return CssBracketToken.CloseRound; case Specification.ASTERISK: current = _src.Next; if (current == Specification.EQ) return CssMatchToken.Substring; return CssToken.Delim(_src.Previous); case Specification.PLUS: { var c1 = _src.Next; if (c1 == Specification.EOF) { _src.Back(); } else { var c2 = _src.Next; _src.Back(2); if (c1.IsDigit() || (c1 == Specification.DOT && c2.IsDigit())) return NumberStart(current); } return CssToken.Delim(current); } case Specification.COMMA: return CssSpecialCharacter.Comma; case Specification.DOT: { var c = _src.Next; if (c.IsDigit()) return NumberStart(_src.Previous); return CssToken.Delim(_src.Previous); } case Specification.MINUS: { var c1 = _src.Next; if (c1 == Specification.EOF) { _src.Back(); } else { var c2 = _src.Next; _src.Back(2); if (c1.IsDigit() || (c1 == Specification.DOT && c2.IsDigit())) return NumberStart(current); else if (c1.IsNameStart()) return IdentStart(current); else if (c1 == Specification.RSOLIDUS && !c2.IsLineBreak() && c2 != Specification.EOF) return IdentStart(current); else if (c1 == Specification.MINUS && c2 == Specification.GT) { _src.Advance(2); if (_ignoreCs) return Data(_src.Next); return CssCommentToken.Close; } } return CssToken.Delim(current); } case Specification.SOLIDUS: current = _src.Next; if (current == Specification.ASTERISK) return Comment(_src.Next); return CssToken.Delim(_src.Previous); case Specification.RSOLIDUS: current = _src.Next; if (current.IsLineBreak() || current == Specification.EOF) { RaiseErrorOccurred(current == Specification.EOF ? ErrorCode.EOF : ErrorCode.LineBreakUnexpected); return CssToken.Delim(_src.Previous); } return IdentStart(_src.Previous); case Specification.COLON: return CssSpecialCharacter.Colon; case Specification.SC: return CssSpecialCharacter.Semicolon; case Specification.LT: current = _src.Next; if (current == Specification.EM) { current = _src.Next; if (current == Specification.MINUS) { current = _src.Next; if (current == Specification.MINUS) { if (_ignoreCs) return Data(_src.Next); return CssCommentToken.Open; } current = _src.Previous; } current = _src.Previous; } return CssToken.Delim(_src.Previous); case Specification.AT: return AtKeywordStart(_src.Next); case Specification.SBO: return CssBracketToken.OpenSquare; case Specification.SBC: return CssBracketToken.CloseSquare; case Specification.ACCENT: current = _src.Next; if (current == Specification.EQ) return CssMatchToken.Prefix; return CssToken.Delim(_src.Previous); case Specification.CBO: return CssBracketToken.OpenCurly; case Specification.CBC: return CssBracketToken.CloseCurly; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return NumberStart(current); case 'U': case 'u': current = _src.Next; if (current == Specification.PLUS) { current = _src.Next; if (current.IsHex() || current == Specification.QM) return UnicodeRange(current); current = _src.Previous; } return IdentStart(_src.Previous); case Specification.PIPE: current = _src.Next; if (current == Specification.EQ) return CssMatchToken.Dash; else if (current == Specification.PIPE) return CssToken.Column; return CssToken.Delim(_src.Previous); case Specification.TILDE: current = _src.Next; if (current == Specification.EQ) return CssMatchToken.Include; return CssToken.Delim(_src.Previous); case Specification.EOF: return null; case Specification.EM: current = _src.Next; if (current == Specification.EQ) return CssMatchToken.Not; return CssToken.Delim(_src.Previous); default: if (current.IsNameStart()) return IdentStart(current); return CssToken.Delim(current); } }
XmlEntityToken CharacterReference(Char c) { _stringBuffer.Clear(); if (c == Specification.NUM) { c = _src.Next; while (c.IsHex()) { _stringBuffer.Append(c); c = _src.Next; } if (_stringBuffer.Length > 0 && c == Specification.SC) return new XmlEntityToken { Value = _stringBuffer.ToString(), IsNumeric = true }; } else if (c.IsNameStart()) { do { _stringBuffer.Append(c); c = _src.Next; } while (c.IsName()); if (c == Specification.SC) return new XmlEntityToken { Value = _stringBuffer.ToString() }; } throw new ArgumentException("Invalid entity reference."); }
/// <summary> /// Called once an & character is being seen. /// </summary> /// <param name="c">The next character after the & character.</param> /// <returns>The entity token.</returns> XmlEntityToken CharacterReference(Char c) { var buffer = Pool.NewStringBuilder(); if (c == Specification.NUM) { c = _src.Next; var hex = c == 'x' || c == 'X'; if (hex) { c = _src.Next; while (c.IsHex()) { buffer.Append(c); c = _src.Next; } } else { while (c.IsDigit()) { buffer.Append(c); c = _src.Next; } } if (buffer.Length > 0 && c == Specification.SC) return new XmlEntityToken { Value = buffer.ToPool(), IsNumeric = true, IsHex = hex }; } else if (c.IsXmlNameStart()) { do { buffer.Append(c); c = _src.Next; } while (c.IsXmlName()); if (c == Specification.SC) return new XmlEntityToken { Value = buffer.ToPool() }; } buffer.ToPool(); throw Errors.Xml(ErrorCode.CharacterReferenceNotTerminated); }
String ConsumeEscape(Char current) { if (current.IsHex()) { var isHex = true; var escape = new Char[6]; var length = 0; while (isHex && length < escape.Length) { escape[length++] = current; current = GetNext(); isHex = current.IsHex(); } if (!current.IsSpaceCharacter()) { Back(); } var code = Int32.Parse(new String(escape, 0, length), NumberStyles.HexNumber); if (!code.IsInvalid()) { return code.ConvertFromUtf32(); } current = Symbols.Replacement; } return current.ToString(); }