Beispiel #1
0
        /*
        If the character reference is being consumed as part of an attribute, and the last character matched is not a U+003B SEMICOLON character (;), and the next character is either a U+003D EQUALS SIGN character (=) or in the range U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9), U+0041 LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z, or U+0061 LATIN SMALL LETTER A to U+007A LATIN SMALL LETTER Z, then, for historical reasons, all the characters that were matched after the U+0026 AMPERSAND character (&) must be unconsumed, and nothing is returned.
        */
        // 名前による文字参照を展開します。
        protected ReferencedCharacterToken ConsumeNamedCharacterReference(Tokenizer t)
        {
            StringBuilder referenceName = new StringBuilder();
            bool semicolonFound = false;

            char? lastChar = t.CurrentInputChar;
            while(lastChar.IsNameToken()){
                referenceName.Append(lastChar);
                lastChar = t.ConsumeChar();
            }
            if(lastChar == Chars.SEMICOLON){
                referenceName.Append(lastChar);
                semicolonFound = true;
            } else {
                // CurrentInputCharをreferenceNameの末尾にそろえる
                t.UnConsume(1);
            }
            string originalString = referenceName.ToString();

            string matchResult = null;
            while(referenceName.Length > 0){
                if(Chars.ExistsNamedChar(referenceName.ToString())){
                    matchResult = Chars.GetNamedChar(referenceName.ToString());
                    break;
                }
                referenceName.Remove(referenceName.Length-1, 1);
                // CurrentInputCharをreferenceNameの末尾にそろえる
                t.UnConsume(1);
            }

            if(matchResult == null){
                if(semicolonFound){
                    // 名前がなく、セミコロンがある場合はパースエラー
                    // if the characters after the U+0026 AMPERSAND character (&) consist of a sequence of one or more characters in the range ASCII digits, lowercase ASCII letters, and uppercase ASCII letters, followed by a ";" (U+003B) character, then this is a parse error.
                    OnMessageRaised(new UnknownNamedCharacterReferenceWithSemicolonError(originalString));
                } else {
                    // 名前がなくセミコロンがない場合はパースエラーにならない
                    OnMessageRaised(new UnknownNamedCharacterReferenceWithoutSemicolonWarning(originalString));
                }
                t.UnConsume(referenceName.Length);
                return null;
            }
            if(!semicolonFound){
                // 属性値の中のセミコロンなし文字参照は無視する
                if(t.CurrentTokenState is CharacterReferenceInAttributeState && t.NextInputChar.IsSuffixOfIgnoreCharacterReferenceInAttribute()){
                    OnMessageRaised(new IgnoredCharacterReferenceInAttributeWarning(referenceName));
                    t.UnConsume(referenceName.Length);
                    return null;
                }
                // それ以外のセミコロンなし文字参照はエラー
                OnMessageRaised(new NamedCharacterReferenceWithoutSemicolonError(referenceName));
            }

            ReferencedCharacterToken resultToken = new ReferencedCharacterToken(matchResult);
            resultToken.OriginalString = referenceName.ToString();
            return resultToken;
        }
Beispiel #2
0
        // 数値文字参照を展開します。
        protected ReferencedCharacterToken ConsumeNumericCharacterReference(Tokenizer t)
        {
            char? c = t.ConsumeChar();
            Predicate<char?> isNumeric = null;
            System.Globalization.NumberStyles parseStyle = Chars.DecimalParseStyle;
            string prefix = "";
            string suffix = "";
            if(c == 'x' || c == 'X'){
                prefix = c.ToString();
                isNumeric = Chars.IsHexDigit;
                parseStyle = Chars.HexParseStyle;
                c = t.ConsumeChar();
            } else {
                isNumeric = Chars.IsDigit;
            }
            StringBuilder matchResult = new StringBuilder();
            while(isNumeric(c)){
                matchResult.Append(c);
                c = t.ConsumeChar();
            }
            if(matchResult.Length == 0){
                OnMessageRaised(new EmptyNumericCharacterReferenceError());
                return null;
            }
            string numberString = matchResult.ToString();
            int resultNumber = int.Parse(numberString, parseStyle);
            string result = GetNumberedChar(t, resultNumber);
            if(c == Chars.SEMICOLON){
                suffix += Chars.SEMICOLON;
            } else {
                OnMessageRaised(new NumericCharacterReferenceWithoutSemicolonError());
                t.UnConsume(1);
            }
            string originalString = prefix + numberString + suffix;

            ReferencedCharacterToken resultToken = new ReferencedCharacterToken(result);
            resultToken.OriginalString = originalString;
            return resultToken;
        }