コード例 #1
0
ファイル: HtmlTokenizer.cs プロジェクト: rrsc/AngleSharp
        /// <summary>
        /// See 8.2.4.69 Tokenizing character references
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="allowedCharacter">The additionally allowed character if there is one.</param>
        String CharacterReference(Char c, Char allowedCharacter = Specification.NULL)
        {
            if (c.IsSpaceCharacter() || c == Specification.LT || c == Specification.EOF || c == Specification.AMPERSAND || c == allowedCharacter)
            {
                _src.Back();
                return null;
            }

            if (c == Specification.NUM)
            {
                var exp = 10;
                var basis = 1;
                var num = 0;
                var nums = new List<Int32>();
                c = _src.Next;
                var isHex = c == 'x' || c == 'X';

                if (isHex)
                {
                    exp = 16;

                    while ((c = _src.Next).IsHex())
                        nums.Add(c.FromHex());
                }
                else
                {
                    while (c.IsDigit())
                    {
                        nums.Add(c.FromHex());
                        c = _src.Next;
                    }
                }

                for (var i = nums.Count - 1; i >= 0; i--)
                {
                    num += nums[i] * basis;
                    basis *= exp;
                }

                if (nums.Count == 0)
                {
                    _src.Back(2);

                    if (isHex)
                        _src.Back();

                    RaiseErrorOccurred(ErrorCode.CharacterReferenceWrongNumber);
                    return null;
                }

                if (c != Specification.SC)
                {
                    RaiseErrorOccurred(ErrorCode.CharacterReferenceSemicolonMissing);
                    _src.Back();
                }

                if (Entities.IsInCharacterTable(num))
                {
                    RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidCode);
                    return Entities.GetSymbolFromTable(num);
                }

                if (Entities.IsInvalidNumber(num))
                {
                    RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidNumber);
                    return Specification.REPLACEMENT.ToString();
                }

                if (Entities.IsInInvalidRange(num))
                    RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidRange);

                return Entities.Convert(num);
            }
            else
            {
                var last = String.Empty;
                var consumed = 0;
                var start = _src.InsertionPoint - 1;
                var reference = new Char[31];
                var index = 0;
                var chr = _src.Current;

                do
                {
                    if (chr == Specification.SC || !chr.IsName())
                        break;

                    reference[index++] = chr;
                    var value = new String(reference, 0, index);
                    chr = _src.Next;
                    consumed++;
                    value = chr == Specification.SC ? Entities.GetSymbol(value) : Entities.GetSymbolWithoutSemicolon(value);

                    if (value != null)
                    {
                        consumed = 0;
                        last = value;
                    }
                }
                while (!_src.IsEnded);

                _src.Back(consumed);
                chr = _src.Current;

                if (chr != Specification.SC)
                {
                    if (allowedCharacter != Specification.NULL && (chr == Specification.EQ || chr.IsAlphanumericAscii()))
                    {
                        if (chr == Specification.EQ)
                            RaiseErrorOccurred(ErrorCode.CharacterReferenceAttributeEqualsFound);

                        _src.InsertionPoint = start;
                        return null;
                    }

                    _src.Back();
                    RaiseErrorOccurred(ErrorCode.CharacterReferenceNotTerminated);
                }

                return last;
            }
        }
コード例 #2
0
ファイル: HtmlTokenizer.cs プロジェクト: JBTech/AngleSharp
        /// <summary>
        /// See 8.2.4.69 Tokenizing character references
        /// </summary>
        /// <param name="c">The next input character.</param>
        /// <param name="allowedCharacter">The additionally allowed character if there is one.</param>
        void AppendCharacterReference(Char c, Char allowedCharacter = Symbols.Null)
        {
            if (c.IsSpaceCharacter() || c == Symbols.LessThan || c == Symbols.EndOfFile || c == Symbols.Ampersand || c == allowedCharacter)
            {
                Back();
                _stringBuffer.Append(Symbols.Ampersand);
                return;
            }

            var entity = default(String);
            
            if (c == Symbols.Num)
            {
                var exp = 10;
                var basis = 1;
                var num = 0;
                var nums = new List<Int32>();
                c = GetNext();
                var isHex = c == 'x' || c == 'X';

                if (isHex)
                {
                    exp = 16;

                    while ((c = GetNext()).IsHex())
                        nums.Add(c.FromHex());
                }
                else
                {
                    while (c.IsDigit())
                    {
                        nums.Add(c.FromHex());
                        c = GetNext();
                    }
                }

                for (var i = nums.Count - 1; i >= 0; i--)
                {
                    num += nums[i] * basis;
                    basis *= exp;
                }

                if (nums.Count == 0)
                {
                    Back(2);

                    if (isHex)
                        Back();

                    RaiseErrorOccurred(HtmlParseError.CharacterReferenceWrongNumber);
                    _stringBuffer.Append(Symbols.Ampersand);
                    return;
                }

                if (c != Symbols.Semicolon)
                {
                    RaiseErrorOccurred(HtmlParseError.CharacterReferenceSemicolonMissing);
                    Back();
                }

                if (Entities.IsInCharacterTable(num))
                {
                    RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidCode);
                    entity = Entities.GetSymbolFromTable(num);
                }
                else if (Entities.IsInvalidNumber(num))
                {
                    RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidNumber);
                    entity = Symbols.Replacement.ToString();
                }
                else 
                {
                    if (Entities.IsInInvalidRange(num))
                        RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidRange);

                    entity = Entities.Convert(num);
                }
            }
            else
            {
                var consumed = 0;
                var start = InsertionPoint - 1;
                var reference = new Char[31];
                var index = 0;
                var chr = Current;

                do
                {
                    if (chr == Symbols.Semicolon || !chr.IsName())
                        break;

                    reference[index++] = chr;
                    var value = new String(reference, 0, index);
                    chr = GetNext();
                    consumed++;
                    value = chr == Symbols.Semicolon ? Entities.GetSymbol(value) : Entities.GetSymbolWithoutSemicolon(value);

                    if (value != null)
                    {
                        consumed = 0;
                        entity = value;
                    }
                }
                while (chr != Symbols.EndOfFile && index < 31);

                Back(consumed);
                chr = Current;

                if (chr != Symbols.Semicolon)
                {
                    if (allowedCharacter != Symbols.Null && (chr == Symbols.Equality || chr.IsAlphanumericAscii()))
                    {
                        if (chr == Symbols.Equality)
                            RaiseErrorOccurred(HtmlParseError.CharacterReferenceAttributeEqualsFound);

                        InsertionPoint = start;
                        _stringBuffer.Append(Symbols.Ampersand);
                        return;
                    }

                    Back();
                    RaiseErrorOccurred(HtmlParseError.CharacterReferenceNotTerminated);
                }

                if (entity == null)
                {
                    _stringBuffer.Append(Symbols.Ampersand);
                    return;
                }
            }

            _stringBuffer.Append(entity);
        }
コード例 #3
0
ファイル: HtmlTokenizer.cs プロジェクト: Wojdav/AngleSharp
        private String GetNumericCharacterReference(Char c)
        {
            var exp = 10;
            var basis = 1;
            var num = 0;
            var nums = new List<Int32>();
            var isHex = c == 'x' || c == 'X';

            if (isHex)
            {
                exp = 16;

                while ((c = GetNext()).IsHex())
                {
                    nums.Add(c.FromHex());
                }
            }
            else
            {
                while (c.IsDigit())
                {
                    nums.Add(c.FromHex());
                    c = GetNext();
                }
            }

            for (var i = nums.Count - 1; i >= 0; i--)
            {
                num += nums[i] * basis;
                basis *= exp;
            }

            if (nums.Count == 0)
            {
                Back(2);

                if (isHex)
                {
                    Back();
                }

                RaiseErrorOccurred(HtmlParseError.CharacterReferenceWrongNumber);
                return null;
            }

            if (c != Symbols.Semicolon)
            {
                RaiseErrorOccurred(HtmlParseError.CharacterReferenceSemicolonMissing);
                Back();
            }

            if (HtmlEntityService.IsInCharacterTable(num))
            {
                RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidCode);
                return HtmlEntityService.GetSymbolFromTable(num);
            }
            else if (HtmlEntityService.IsInvalidNumber(num))
            {
                RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidNumber);
                return Symbols.Replacement.ToString();
            }
            else if (HtmlEntityService.IsInInvalidRange(num))
            {
                RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidRange);
            }

            return num.ConvertFromUtf32();
        }