Ejemplo n.º 1
0
        public override bool MoveNext(
            )
        {
            bool moved;

            while (moved = base.MoveNext())
            {
                TokenTypeEnum tokenType = TokenType;
                if (tokenType == TokenTypeEnum.Comment)
                {
                    continue; // Comments are ignored.
                }
                if (tokenType == TokenTypeEnum.Literal)
                {
                    string literalToken = (string)Token;
                    if (literalToken.StartsWith(Keyword.DatePrefix)) // Date.
                    {
                        /*
                         * NOTE: Dates are a weak extension to the PostScript language.
                         */
                        try
                        { Token = PdfDate.ToDate(literalToken); }
                        catch (ParseException)
                        { /* NOOP: gently degrade to a common literal. */ }
                    }
                }
                break;
            }
            return(moved);
        }
Ejemplo n.º 2
0
        /**
         * <summary>Parse the next token [PDF:1.6:3.1].</summary>
         * <remarks>
         *  Contract:
         *  <list type="bullet">
         *    <li>Preconditions:
         *      <list type="number">
         *        <item>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</item>
         *      </list>
         *    </item>
         *    <item>Postconditions:
         *      <list type="number">
         *        <item id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</item>
         *      </list>
         *    </item>
         *    <item>Invariants:
         *      <list type="number">
         *        <item>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</item>
         *      </list>
         *    </item>
         *    <item>Side-effects:
         *      <list type="number">
         *        <item>See <see href="#moveNext_contract_post[0]">Postconditions</see>.</item>
         *      </list>
         *    </item>
         *  </list>
         * </remarks>
         * <returns>Whether a new token was found.</returns>
         */
        public bool MoveNext(
            )
        {
            if (stream == null)
            {
                return(false);
            }

            /*
             * NOTE: It'd be interesting to evaluate an alternative regular-expression-based
             * implementation...
             */
            int c = 0;

            // Skip white-space characters [PDF:1.6:3.1.1].
            while (true)
            {
                c = stream.ReadByte();
                if (c == -1)
                {
                    /* NOTE: Current stream has finished. */
                    // Move to the next stream!
                    MoveNextStream();
                    // No more streams?
                    if (stream == null)
                    {
                        return(false);
                    }
                }
                else if (!IsWhitespace(c)) // Keep goin' till there's a white-space character...
                {
                    break;
                }
            }

            StringBuilder buffer = null;

            token = null;
            // Which character is it?
            switch (c)
            {
            case '/': // Name.
                tokenType = TokenTypeEnum.Name;

                buffer = new StringBuilder();
                while ((c = stream.ReadByte()) != -1)
                {
                    if (IsDelimiter(c) || IsWhitespace(c))
                    {
                        break;
                    }
                    // Is it an hexadecimal code [PDF:1.6:3.2.4]?
                    if (c == '#')
                    {
                        try
                        { c = (GetHex(stream.ReadByte()) << 4) + GetHex(stream.ReadByte()); }
                        catch
                        { throw new FileFormatException("Unexpected EOF (malformed hexadecimal code in name object).", stream.Position); }
                    }

                    buffer.Append((char)c);
                }
                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '.':
            case '-':
            case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
                switch (c)
                {
                case '.': // Decimal point.
                    tokenType = TokenTypeEnum.Real;
                    break;

                default:                               // Digit or signum.
                    tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    break;
                }

                // Building the number...
                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }

                    if (c == '.')
                    {
                        tokenType = TokenTypeEnum.Real;
                    }
                    else if (c < '0' || c > '9')
                    {
                        break;
                    }
                } while(true);

                stream.Skip(-1); // Recover the first byte after the current token.

                break;

            case '[': // Array (begin).
                tokenType = TokenTypeEnum.ArrayBegin;

                break;

            case ']': // Array (end).
                tokenType = TokenTypeEnum.ArrayEnd;

                break;

            case '<': // Dictionary (begin) | Hexadecimal string.
                c = stream.ReadByte();
                if (c == -1)
                {
                    throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position);
                }
                // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
                if (c == '<')
                {
                    tokenType = TokenTypeEnum.DictionaryBegin;
                    break;
                }

                // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
                tokenType = TokenTypeEnum.Hex;

                // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
                buffer = new StringBuilder();
                while (c != '>') // NOT string end.
                {
                    buffer.Append((char)c);

                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position);
                    }
                }

                break;

            case '>': // Dictionary (end).
                c = stream.ReadByte();
                if (c != '>')
                {
                    throw new FileFormatException("Malformed dictionary.", stream.Position);
                }

                tokenType = TokenTypeEnum.DictionaryEnd;

                break;

            case '(': // Literal string.
                tokenType = TokenTypeEnum.Literal;

                buffer = new StringBuilder();
                int level = 0;
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                    if (c == '(')
                    {
                        level++;
                    }
                    else if (c == ')')
                    {
                        level--;
                    }
                    else if (c == '\\')
                    {
                        bool lineBreak = false;
                        c = stream.ReadByte();
                        switch (c)
                        {
                        case 'n':
                            c = '\n';
                            break;

                        case 'r':
                            c = '\r';
                            break;

                        case 't':
                            c = '\t';
                            break;

                        case 'b':
                            c = '\b';
                            break;

                        case 'f':
                            c = '\f';
                            break;

                        case '(':
                        case ')':
                        case '\\':
                            break;

                        case '\r':
                            lineBreak = true;
                            c         = stream.ReadByte();
                            if (c != '\n')
                            {
                                stream.Skip(-1);
                            }
                            break;

                        case '\n':
                            lineBreak = true;
                            break;

                        default:
                        {
                            // Is it outside the octal encoding?
                            if (c < '0' || c > '7')
                            {
                                break;
                            }

                            // Octal [PDF:1.6:3.2.3].
                            int octal = c - '0';
                            c = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = octal & 0xff;
                            break;
                        }
                        }
                        if (lineBreak)
                        {
                            continue;
                        }
                        if (c == -1)
                        {
                            break;
                        }
                    }
                    else if (c == '\r')
                    {
                        c = stream.ReadByte();
                        if (c == -1)
                        {
                            break;
                        }
                        if (c != '\n')
                        {
                            c = '\n'; stream.Skip(-1);
                        }
                    }
                    if (level == -1)
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                if (c == -1)
                {
                    throw new FileFormatException("Malformed literal string.", stream.Position);
                }

                break;

            case '%': // Comment.
                tokenType = TokenTypeEnum.Comment;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1 ||
                        IsEOL(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }

                break;

            default: // Keyword.
                tokenType = TokenTypeEnum.Keyword;

                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                } while(!IsDelimiter(c) && !IsWhitespace(c));
                stream.Skip(-1); // Recover the first byte after the current token.

                break;
            }

            if (buffer != null)
            {
                /*
                 * Here we prepare the current token state.
                 */
                // Wich token type?
                switch (tokenType)
                {
                case TokenTypeEnum.Keyword:
                    token = buffer.ToString();
                    // Late recognition.
                    switch ((string)token)
                    {
                    case Keyword.False:
                    case Keyword.True: // Boolean.
                        tokenType = TokenTypeEnum.Boolean;
                        token     = bool.Parse((string)token);
                        break;

                    case Keyword.Null: // Null.
                        tokenType = TokenTypeEnum.Null;
                        token     = null;
                        break;
                    }
                    break;

                case TokenTypeEnum.Comment:
                case TokenTypeEnum.Hex:
                case TokenTypeEnum.Name:
                    token = buffer.ToString();
                    break;

                case TokenTypeEnum.Literal:
                    token = buffer.ToString();
                    // Late recognition.
                    if (((string)token).StartsWith("D:")) // Date.
                    {
                        tokenType = TokenTypeEnum.Date;
                        token     = PdfDate.ToDate((string)token);
                    }
                    break;

                case TokenTypeEnum.Integer:
                    token = Int32.Parse(
                        buffer.ToString(),
                        NumberStyles.Integer,
                        StandardNumberFormatInfo
                        );
                    break;

                case TokenTypeEnum.Real:
                    // [FIX:1668410] Parsing of float numbers was buggy (localized default number format).
                    token = Single.Parse(
                        buffer.ToString(),
                        NumberStyles.Float,
                        StandardNumberFormatInfo
                        );
                    break;
                }
            }

            return(true);
        }
Ejemplo n.º 3
0
        /**
         * <summary>Parses the next token [PDF:1.6:3.1].</summary>
         * <remarks>To properly parse the current token, the pointer MUST be just before its starting
         * (leading whitespaces are ignored). When this method terminates, the pointer IS
         * at the last byte of the current token.</remarks>
         * <returns>Whether a new token was found.</returns>
         */
        public bool MoveNext(
            )
        {
            /*
             * NOTE: It'd be interesting to evaluate an alternative regular-expression-based
             * implementation...
             */
            StringBuilder buffer = null;

            token = null;
            int c = 0;

            // Skip white-space characters [PDF:1.6:3.1.1].
            do
            {
                c = stream.ReadByte();
                if (c == -1)
                {
                    return(false);
                }
            } while(IsWhitespace(c)); // Keep goin' till there's a white-space character...

            // Which character is it?
            switch (c)
            {
            case Symbol.Slash: // Name.
                tokenType = TokenTypeEnum.Name;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed name object).", stream.Position);
                    }
                    if (IsDelimiter(c) || IsWhitespace(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '.':
            case '-':
            case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
                switch (c)
                {
                case '.': // Decimal point.
                    tokenType = TokenTypeEnum.Real;
                    break;

                case '-':
                case '+':                              // Signum.
                    tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    break;

                default:                                   // Digit.
                    if (multipleTokenParsing)              // Plain number (multiple token parsing -- see indirect reference search).
                    {
                        tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    }
                    else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]).
                    {
                        /*
                         * NOTE: We need to identify this pattern:
                         * ref :=  { int int 'R' }
                         */
                        // Enable multiple token parsing!
                        // NOTE: This state MUST be disabled before returning.
                        multipleTokenParsing = true;

                        // 1. Object number.
                        // Try the possible object number!
                        stream.Skip(-1); MoveNext();
                        // Isn't it a valid object number?
                        if (tokenType != TokenTypeEnum.Integer)
                        {
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        // Assign object number!
                        int objectNumber = (int)token;
                        // Backup the recovery position!
                        long oldOffset = stream.Position;

                        // 2. Generation number.
                        // Try the possible generation number!
                        MoveNext();
                        // Isn't it a valid generation number?
                        if (tokenType != TokenTypeEnum.Integer)
                        {
                            // Rollback!
                            stream.Seek(oldOffset);
                            token = objectNumber; tokenType = TokenTypeEnum.Integer;
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        // Assign generation number!
                        int generationNumber = (int)token;

                        // 3. Reference keyword.
                        // Try the possible reference keyword!
                        MoveNext();
                        // Isn't it a valid reference keyword?
                        if (tokenType != TokenTypeEnum.Reference)
                        {
                            // Rollback!
                            stream.Seek(oldOffset);
                            token = objectNumber; tokenType = TokenTypeEnum.Integer;
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        token = new Reference(objectNumber, generationNumber);
                        // Disable multiple token parsing!
                        multipleTokenParsing = false;
                        return(true);
                    }
                    break;
                }

                // Building the number...
                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed number object).", stream.Position);
                    }
                    if (c == '.')
                    {
                        tokenType = TokenTypeEnum.Real;
                    }
                    else if (c < '0' || c > '9')
                    {
                        break;
                    }
                } while(true);

                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case Symbol.OpenSquareBracket: // Array (begin).
                tokenType = TokenTypeEnum.ArrayBegin;
                break;

            case Symbol.CloseSquareBracket: // Array (end).
                tokenType = TokenTypeEnum.ArrayEnd;
                break;

            case Symbol.OpenAngleBracket: // Dictionary (begin) | Hexadecimal string.
                c = stream.ReadByte();
                if (c == -1)
                {
                    throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position);
                }
                // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
                if (c == Symbol.OpenAngleBracket)
                {
                    tokenType = TokenTypeEnum.DictionaryBegin;
                    break;
                }

                // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
                tokenType = TokenTypeEnum.Hex;

                // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
                buffer = new StringBuilder();
                while (c != Symbol.CloseAngleBracket) // NOT string end.
                {
                    buffer.Append((char)c);

                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position);
                    }
                }
                break;

            case Symbol.CloseAngleBracket: // Dictionary (end).
                c = stream.ReadByte();
                if (c != Symbol.CloseAngleBracket)
                {
                    throw new FileFormatException("Malformed dictionary.", stream.Position);
                }

                tokenType = TokenTypeEnum.DictionaryEnd;
                break;

            case Symbol.OpenRoundBracket: // Literal string.
                tokenType = TokenTypeEnum.Literal;

                buffer = new StringBuilder();
                int level = 0;
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                    if (c == Symbol.OpenRoundBracket)
                    {
                        level++;
                    }
                    else if (c == Symbol.CloseRoundBracket)
                    {
                        level--;
                    }
                    else if (c == '\\')
                    {
                        bool lineBreak = false;
                        c = stream.ReadByte();
                        switch (c)
                        {
                        case 'n':
                            c = Symbol.LineFeed;
                            break;

                        case 'r':
                            c = Symbol.CarriageReturn;
                            break;

                        case 't':
                            c = '\t';
                            break;

                        case 'b':
                            c = '\b';
                            break;

                        case 'f':
                            c = '\f';
                            break;

                        case Symbol.OpenRoundBracket:
                        case Symbol.CloseRoundBracket:
                        case '\\':
                            break;

                        case Symbol.CarriageReturn:
                            lineBreak = true;
                            c         = stream.ReadByte();
                            if (c != Symbol.LineFeed)
                            {
                                stream.Skip(-1);
                            }
                            break;

                        case Symbol.LineFeed:
                            lineBreak = true;
                            break;

                        default:
                        {
                            // Is it outside the octal encoding?
                            if (c < '0' || c > '7')
                            {
                                break;
                            }

                            // Octal [PDF:1.6:3.2.3].
                            int octal = c - '0';
                            c = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = octal & 0xff;
                            break;
                        }
                        }
                        if (lineBreak)
                        {
                            continue;
                        }
                        if (c == -1)
                        {
                            break;
                        }
                    }
                    else if (c == Symbol.CarriageReturn)
                    {
                        c = stream.ReadByte();
                        if (c == -1)
                        {
                            break;
                        }
                        if (c != Symbol.LineFeed)
                        {
                            c = Symbol.LineFeed; stream.Skip(-1);
                        }
                    }
                    if (level == -1)
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                if (c == -1)
                {
                    throw new FileFormatException("Malformed literal string.", stream.Position);
                }

                break;

            case Symbol.CapitalR: // Indirect reference.
                tokenType = TokenTypeEnum.Reference;
                break;

            case Symbol.Percent: // Comment [PDF:1.6:3.1.2].
                tokenType = TokenTypeEnum.Comment;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1 ||
                        IsEOL(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                break;

            default: // Keyword object.
                tokenType = TokenTypeEnum.Keyword;

                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                } while(!IsDelimiter(c) && !IsWhitespace(c));
                stream.Skip(-1); // Recover the first byte after the current token.
                break;
            }

            if (buffer != null)
            {
                /*
                 * Current token initialization.
                 */
                // Wich token type?
                switch (tokenType)
                {
                case TokenTypeEnum.Keyword:
                    token = buffer.ToString();
                    // Late recognition.
                    switch ((string)token)
                    {
                    case Keyword.False:
                    case Keyword.True: // Boolean.
                        tokenType = TokenTypeEnum.Boolean;
                        token     = bool.Parse((string)token);
                        break;

                    case Keyword.Null: // Null.
                        tokenType = TokenTypeEnum.Null;
                        token     = null;
                        break;
                    }
                    break;

                case TokenTypeEnum.Comment:
                case TokenTypeEnum.Hex:
                case TokenTypeEnum.Name:
                    token = buffer.ToString();
                    break;

                case TokenTypeEnum.Literal:
                    token = buffer.ToString();
                    // Late recognition.
                    if (((string)token).StartsWith(Keyword.DatePrefix)) // Date.
                    {
                        tokenType = TokenTypeEnum.Date;
                        token     = PdfDate.ToDate((string)token);
                    }
                    break;

                case TokenTypeEnum.Integer:
                    token = Int32.Parse(
                        buffer.ToString(),
                        NumberStyles.Integer,
                        StandardNumberFormatInfo
                        );
                    break;

                case TokenTypeEnum.Real:
                    // [FIX:1668410] Parsing of float numbers was buggy (localized default number format).
                    token = Single.Parse(
                        buffer.ToString(),
                        NumberStyles.Float,
                        StandardNumberFormatInfo
                        );
                    break;
                }
            }
            return(true);
        }