/// <summary> /// See 8.2.4.69 Tokenizing character references /// </summary> /// <param name="c">The next input character.</param> /// <param name="allowedCharacter">The additionally allowed character if there is one.</param> String CharacterReference(Char c, Char allowedCharacter = Specification.NULL) { if (c.IsSpaceCharacter() || c == Specification.LT || c == Specification.EOF || c == Specification.AMPERSAND || c == allowedCharacter) { _src.Back(); return null; } if (c == Specification.NUM) { var exp = 10; var basis = 1; var num = 0; var nums = new List<Int32>(); c = _src.Next; var isHex = c == 'x' || c == 'X'; if (isHex) { exp = 16; while ((c = _src.Next).IsHex()) nums.Add(c.FromHex()); } else { while (c.IsDigit()) { nums.Add(c.FromHex()); c = _src.Next; } } for (var i = nums.Count - 1; i >= 0; i--) { num += nums[i] * basis; basis *= exp; } if (nums.Count == 0) { _src.Back(2); if (isHex) _src.Back(); RaiseErrorOccurred(ErrorCode.CharacterReferenceWrongNumber); return null; } if (c != Specification.SC) { RaiseErrorOccurred(ErrorCode.CharacterReferenceSemicolonMissing); _src.Back(); } if (Entities.IsInCharacterTable(num)) { RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidCode); return Entities.GetSymbolFromTable(num); } if (Entities.IsInvalidNumber(num)) { RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidNumber); return Specification.REPLACEMENT.ToString(); } if (Entities.IsInInvalidRange(num)) RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidRange); return Entities.Convert(num); } else { var last = String.Empty; var consumed = 0; var start = _src.InsertionPoint - 1; var reference = new Char[31]; var index = 0; var chr = _src.Current; do { if (chr == Specification.SC || !chr.IsName()) break; reference[index++] = chr; var value = new String(reference, 0, index); chr = _src.Next; consumed++; value = chr == Specification.SC ? Entities.GetSymbol(value) : Entities.GetSymbolWithoutSemicolon(value); if (value != null) { consumed = 0; last = value; } } while (!_src.IsEnded); _src.Back(consumed); chr = _src.Current; if (chr != Specification.SC) { if (allowedCharacter != Specification.NULL && (chr == Specification.EQ || chr.IsAlphanumericAscii())) { if (chr == Specification.EQ) RaiseErrorOccurred(ErrorCode.CharacterReferenceAttributeEqualsFound); _src.InsertionPoint = start; return null; } _src.Back(); RaiseErrorOccurred(ErrorCode.CharacterReferenceNotTerminated); } return last; } }
/// <summary> /// See 8.2.4.69 Tokenizing character references /// </summary> /// <param name="c">The next input character.</param> /// <param name="allowedCharacter">The additionally allowed character if there is one.</param> void AppendCharacterReference(Char c, Char allowedCharacter = Symbols.Null) { if (c.IsSpaceCharacter() || c == Symbols.LessThan || c == Symbols.EndOfFile || c == Symbols.Ampersand || c == allowedCharacter) { Back(); _stringBuffer.Append(Symbols.Ampersand); return; } var entity = default(String); if (c == Symbols.Num) { var exp = 10; var basis = 1; var num = 0; var nums = new List<Int32>(); c = GetNext(); var isHex = c == 'x' || c == 'X'; if (isHex) { exp = 16; while ((c = GetNext()).IsHex()) nums.Add(c.FromHex()); } else { while (c.IsDigit()) { nums.Add(c.FromHex()); c = GetNext(); } } for (var i = nums.Count - 1; i >= 0; i--) { num += nums[i] * basis; basis *= exp; } if (nums.Count == 0) { Back(2); if (isHex) Back(); RaiseErrorOccurred(HtmlParseError.CharacterReferenceWrongNumber); _stringBuffer.Append(Symbols.Ampersand); return; } if (c != Symbols.Semicolon) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceSemicolonMissing); Back(); } if (Entities.IsInCharacterTable(num)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidCode); entity = Entities.GetSymbolFromTable(num); } else if (Entities.IsInvalidNumber(num)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidNumber); entity = Symbols.Replacement.ToString(); } else { if (Entities.IsInInvalidRange(num)) RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidRange); entity = Entities.Convert(num); } } else { var consumed = 0; var start = InsertionPoint - 1; var reference = new Char[31]; var index = 0; var chr = Current; do { if (chr == Symbols.Semicolon || !chr.IsName()) break; reference[index++] = chr; var value = new String(reference, 0, index); chr = GetNext(); consumed++; value = chr == Symbols.Semicolon ? Entities.GetSymbol(value) : Entities.GetSymbolWithoutSemicolon(value); if (value != null) { consumed = 0; entity = value; } } while (chr != Symbols.EndOfFile && index < 31); Back(consumed); chr = Current; if (chr != Symbols.Semicolon) { if (allowedCharacter != Symbols.Null && (chr == Symbols.Equality || chr.IsAlphanumericAscii())) { if (chr == Symbols.Equality) RaiseErrorOccurred(HtmlParseError.CharacterReferenceAttributeEqualsFound); InsertionPoint = start; _stringBuffer.Append(Symbols.Ampersand); return; } Back(); RaiseErrorOccurred(HtmlParseError.CharacterReferenceNotTerminated); } if (entity == null) { _stringBuffer.Append(Symbols.Ampersand); return; } } _stringBuffer.Append(entity); }
private String GetNumericCharacterReference(Char c) { var exp = 10; var basis = 1; var num = 0; var nums = new List<Int32>(); var isHex = c == 'x' || c == 'X'; if (isHex) { exp = 16; while ((c = GetNext()).IsHex()) { nums.Add(c.FromHex()); } } else { while (c.IsDigit()) { nums.Add(c.FromHex()); c = GetNext(); } } for (var i = nums.Count - 1; i >= 0; i--) { num += nums[i] * basis; basis *= exp; } if (nums.Count == 0) { Back(2); if (isHex) { Back(); } RaiseErrorOccurred(HtmlParseError.CharacterReferenceWrongNumber); return null; } if (c != Symbols.Semicolon) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceSemicolonMissing); Back(); } if (HtmlEntityService.IsInCharacterTable(num)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidCode); return HtmlEntityService.GetSymbolFromTable(num); } else if (HtmlEntityService.IsInvalidNumber(num)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidNumber); return Symbols.Replacement.ToString(); } else if (HtmlEntityService.IsInInvalidRange(num)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidRange); } return num.ConvertFromUtf32(); }