예제 #1
0
        /* off points to character following first character of
           attribute name */
        private TOK scanAtts(int nameStart, byte[] buf, int off, int end,
            ContentToken token)
        {
            int NameEnd = -1;
            while (off != end)
            {
                switch (byteType(buf, off))
                {
                    case BT_NMSTRT:
                    case BT_NAME:
                    case BT_MINUS:
                        off += minBPC;
                        break;
                    case BT_LEAD2:
                        if (end - off < 2)
                            throw new PartialCharException(off);
                        if (!isNameChar2(buf, off))
                            throw new InvalidTokenException(off);
                        off += 2;
                        break;
                    case BT_LEAD3:
                        if (end - off < 3)
                            throw new PartialCharException(off);
                        if (!isNameChar3(buf, off))
                            throw new InvalidTokenException(off);
                        off += 3;
                        break;
                    case BT_LEAD4:
                        if (end - off < 4)
                            throw new PartialCharException(off);
                        if (!isNameChar4(buf, off))
                            throw new InvalidTokenException(off);
                        off += 4;
                        break;
                    case BT_S:
                    case BT_CR:
                    case BT_LF:
                        NameEnd = off;
                        for (;;)
                        {
                            off += minBPC;
                            if (off == end)
                                throw new PartialTokenException();
                            switch (byteType(buf, off))
                            {
                                case BT_EQUALS:
                                    goto loop;
                                case BT_S:
                                case BT_LF:
                                case BT_CR:
                                    break;
                                default:
                                    throw new InvalidTokenException(off);
                            }
                        }
                        loop: ;
                        /* fall through */
                        goto case BT_EQUALS;
                    case BT_EQUALS:
                    {
                        if (NameEnd < 0)
                            NameEnd = off;
                        int open;
                        for (;;)
                        {

                            off += minBPC;
                            if (off == end)
                                throw new PartialTokenException();
                            open = byteType(buf, off);
                            if (open == BT_QUOT || open == BT_APOS)
                                break;
                            switch (open)
                            {
                                case BT_S:
                                case BT_LF:
                                case BT_CR:
                                    break;
                                default:
                                    throw new InvalidTokenException(off);
                            }
                        }
                        off += minBPC;
                        int valueStart = off;
                        bool normalized = true;
                        int t;
                        /* in attribute value */
                        for (;;)
                        {
                            if (off == end)
                                throw new PartialTokenException();
                            t = byteType(buf, off);
                            if (t == open)
                                break;
                            switch (t)
                            {
                                case BT_NONXML:
                                case BT_MALFORM:
                                    throw new InvalidTokenException(off);
                                case BT_LEAD2:
                                    if (end - off < 2)
                                        throw new PartialCharException(off);
                                    check2(buf, off);
                                    off += 2;
                                    break;
                                case BT_LEAD3:
                                    if (end - off < 3)
                                        throw new PartialCharException(off);
                                    check3(buf, off);
                                    off += 3;
                                    break;
                                case BT_LEAD4:
                                    if (end - off < 4)
                                        throw new PartialCharException(off);
                                    check4(buf, off);
                                    off += 4;
                                    break;
                                case BT_AMP:
                                {
                                    normalized = false;
                                    int saveNameEnd = token.NameEnd;
                                    scanRef(buf, off + minBPC, end, token);
                                    token.NameEnd = saveNameEnd;
                                    off = token.TokenEnd;
                                    break;
                                }
                                case BT_S:
                                    if (normalized
                                        && (off == valueStart
                                        || byteToAscii(buf, off) != ' '
                                        || (off + minBPC != end
                                        && (byteToAscii(buf, off + minBPC) == ' '
                                        || byteType(buf, off + minBPC) == open))))
                                        normalized = false;
                                    off += minBPC;
                                    break;
                                case BT_LT:
                                    throw new InvalidTokenException(off);
                                case BT_LF:
                                case BT_CR:
                                    normalized = false;
                                    /* fall through */
                                    goto default;
                                default:
                                    off += minBPC;
                                    break;
                            }
                        }
                        token.appendAttribute(nameStart, NameEnd, valueStart,
                            off,
                            normalized);
                        off += minBPC;
                        if (off == end)
                            throw new PartialTokenException();
                        t = byteType(buf, off);
                        switch (t)
                        {
                            case BT_S:
                            case BT_CR:
                            case BT_LF:
                                off += minBPC;
                                if (off == end)
                                    throw new PartialTokenException();
                                t = byteType(buf, off);
                                break;
                            case BT_GT:
                            case BT_SOL:
                                break;
                            default:
                                throw new InvalidTokenException(off);
                        }
                        /* off points to closing quote */
                        for (;;)
                        {
                            switch (t)
                            {
                                case BT_NMSTRT:
                                    nameStart = off;
                                    off += minBPC;
                                    goto skipToName;
                                case BT_LEAD2:
                                    if (end - off < 2)
                                        throw new PartialCharException(off);
                                    if (byteType2(buf, off) != BT_NMSTRT)
                                        throw new InvalidTokenException(off);
                                    nameStart = off;
                                    off += 2;
                                    goto skipToName;
                                case BT_LEAD3:
                                    if (end - off < 3)
                                        throw new PartialCharException(off);
                                    if (byteType3(buf, off) != BT_NMSTRT)
                                        throw new InvalidTokenException(off);
                                    nameStart = off;
                                    off += 3;
                                    goto skipToName;
                                case BT_LEAD4:
                                    if (end - off < 4)
                                        throw new PartialCharException(off);
                                    if (byteType4(buf, off) != BT_NMSTRT)
                                        throw new InvalidTokenException(off);
                                    nameStart = off;
                                    off += 4;
                                    goto skipToName;
                                case BT_S:
                                case BT_CR:
                                case BT_LF:
                                    break;
                                case BT_GT:
                                    token.checkAttributeUniqueness(buf);
                                    token.TokenEnd = off + minBPC;
                                    return TOK.START_TAG_WITH_ATTS;
                                case BT_SOL:
                                    off += minBPC;
                                    if (off == end)
                                        throw new PartialTokenException();
                                    checkCharMatches(buf, off, '>');
                                    token.checkAttributeUniqueness(buf);
                                    token.TokenEnd = off + minBPC;
                                    return TOK.EMPTY_ELEMENT_WITH_ATTS;
                                default:
                                    throw new InvalidTokenException(off);
                            }
                            off += minBPC;
                            if (off == end)
                                throw new PartialTokenException();
                            t = byteType(buf, off);
                        }

                        skipToName:
                            NameEnd = -1;
                        break;
                    }
                    default:
                        throw new InvalidTokenException(off);
                }
            }
            throw new PartialTokenException();
        }