/**
         * <summary>Moves the pointer to the next token.</summary>
         * <remarks>To properly parse the current token, the pointer MUST be just before its starting
         * (leading whitespaces are ignored). When this method terminates, the pointer IS
         * at the last byte of the current token.</remarks>
         * <returns>Whether a new token was found.</returns>
         */
        public virtual bool MoveNext(
            )
        {
            StringBuilder buffer = null;

            token = null;
            int c = 0;

            // Skip leading white-space characters.
            do
            {
                c = stream.ReadByte();
                if (c == -1)
                {
                    return(false);
                }
            } while(IsWhitespace(c)); // Keep goin' till there's a white-space character...

            // Which character is it?
            switch (c)
            {
            case Symbol.Slash: // Name.
            {
                tokenType = TokenTypeEnum.Name;

                /*
                 * NOTE: As name objects are simple symbols uniquely defined by sequences of characters,
                 * the bytes making up the name are never treated as text, so here they are just
                 * passed through without unescaping.
                 */
                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break; // NOOP.
                    }
                    if (IsDelimiter(c) || IsWhitespace(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                if (c > -1)
                {
                    stream.Skip(-1);
                }            // Restores the first byte after the current token.
            } break;

            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '.':
            case '-':
            case '+': // Number.
            {
                if (c == '.')
                {
                    tokenType = TokenTypeEnum.Real;
                }
                else // Digit or signum.
                {
                    tokenType = TokenTypeEnum.Integer;
                }                              // By default (it may be real).

                // Building the number...
                buffer = new StringBuilder();
                while (true)
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break; // NOOP.
                    }
                    else if (c == '.')
                    {
                        tokenType = TokenTypeEnum.Real;
                    }
                    else if (c < '0' || c > '9')
                    {
                        break;
                    }
                }
                if (c > -1)
                {
                    stream.Skip(-1);
                }            // Restores the first byte after the current token.
            } break;

            case Symbol.OpenSquareBracket: // Array (begin).
                tokenType = TokenTypeEnum.ArrayBegin;
                break;

            case Symbol.CloseSquareBracket: // Array (end).
                tokenType = TokenTypeEnum.ArrayEnd;
                break;

            case Symbol.OpenAngleBracket: // Dictionary (begin) | Hexadecimal string.
            {
                c = stream.ReadByte();
                if (c == -1)
                {
                    throw new ParseException("Unexpected EOF (isolated opening angle-bracket character).");
                }
                // Is it a dictionary (2nd angle bracket)?
                if (c == Symbol.OpenAngleBracket)
                {
                    tokenType = TokenTypeEnum.DictionaryBegin;
                    break;
                }

                // Hexadecimal string (single angle bracket).
                tokenType = TokenTypeEnum.Hex;

                buffer = new StringBuilder();
                while (c != Symbol.CloseAngleBracket) // NOT string end.
                {
                    if (!IsWhitespace(c))
                    {
                        buffer.Append((char)c);
                    }

                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new ParseException("Unexpected EOF (malformed hex string).");
                    }
                }
            } break;

            case Symbol.CloseAngleBracket: // Dictionary (end).
            {
                c = stream.ReadByte();
                if (c != Symbol.CloseAngleBracket)
                {
                    throw new ParseException("Malformed dictionary.", stream.Position);
                }

                tokenType = TokenTypeEnum.DictionaryEnd;
            } break;

            case Symbol.OpenRoundBracket: // Literal string.
            {
                tokenType = TokenTypeEnum.Literal;

                buffer = new StringBuilder();
                int level = 0;
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                    else if (c == Symbol.OpenRoundBracket)
                    {
                        level++;
                    }
                    else if (c == Symbol.CloseRoundBracket)
                    {
                        level--;
                    }
                    else if (c == '\\')
                    {
                        bool lineBreak = false;
                        c = stream.ReadByte();
                        switch (c)
                        {
                        case 'n':
                            c = Symbol.LineFeed;
                            break;

                        case 'r':
                            c = Symbol.CarriageReturn;
                            break;

                        case 't':
                            c = '\t';
                            break;

                        case 'b':
                            c = '\b';
                            break;

                        case 'f':
                            c = '\f';
                            break;

                        case Symbol.OpenRoundBracket:
                        case Symbol.CloseRoundBracket:
                        case '\\':
                            break;

                        case Symbol.CarriageReturn:
                            lineBreak = true;
                            c         = stream.ReadByte();
                            if (c != Symbol.LineFeed)
                            {
                                stream.Skip(-1);
                            }
                            break;

                        case Symbol.LineFeed:
                            lineBreak = true;
                            break;

                        default:
                        {
                            // Is it outside the octal encoding?
                            if (c < '0' || c > '7')
                            {
                                break;
                            }

                            // Octal.
                            int octal = c - '0';
                            c = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = octal & 0xff;
                            break;
                        }
                        }
                        if (lineBreak)
                        {
                            continue;
                        }
                        if (c == -1)
                        {
                            break;
                        }
                    }
                    else if (c == Symbol.CarriageReturn)
                    {
                        c = stream.ReadByte();
                        if (c == -1)
                        {
                            break;
                        }
                        else if (c != Symbol.LineFeed)
                        {
                            c = Symbol.LineFeed; stream.Skip(-1);
                        }
                    }
                    if (level == -1)
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                if (c == -1)
                {
                    throw new ParseException("Malformed literal string.");
                }
            } break;

            case Symbol.Percent: // Comment.
            {
                tokenType = TokenTypeEnum.Comment;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1 ||
                        IsEOL(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
            } break;

            default: // Keyword.
            {
                tokenType = TokenTypeEnum.Keyword;

                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                } while(!IsDelimiter(c) && !IsWhitespace(c));
                if (c > -1)
                {
                    stream.Skip(-1);
                }            // Restores the first byte after the current token.
            } break;
            }

            if (buffer != null)
            {
                switch (tokenType)
                {
                case TokenTypeEnum.Keyword:
                {
                    token = buffer.ToString();
                    switch ((string)token)
                    {
                    case Keyword.False:
                    case Keyword.True: // Boolean.
                        tokenType = TokenTypeEnum.Boolean;
                        token     = bool.Parse((string)token);
                        break;

                    case Keyword.Null: // Null.
                        tokenType = TokenTypeEnum.Null;
                        token     = null;
                        break;
                    }
                } break;

                case TokenTypeEnum.Name:
                case TokenTypeEnum.Literal:
                case TokenTypeEnum.Hex:
                case TokenTypeEnum.Comment:
                    token = buffer.ToString();
                    break;

                case TokenTypeEnum.Integer:
                    token = Int32.Parse(
                        buffer.ToString(),
                        NumberStyles.Integer,
                        StandardNumberFormatInfo
                        );
                    break;

                case TokenTypeEnum.Real:
                    token = Double.Parse(
                        buffer.ToString(),
                        NumberStyles.Float,
                        StandardNumberFormatInfo
                        );
                    break;
                }
            }
            return(true);
        }
示例#2
0
        /**
         * <summary>Parses the next token [PDF:1.6:3.1].</summary>
         * <remarks>To properly parse the current token, the pointer MUST be just before its starting
         * (leading whitespaces are ignored). When this method terminates, the pointer IS
         * at the last byte of the current token.</remarks>
         * <returns>Whether a new token was found.</returns>
         */
        public bool MoveNext(
            )
        {
            /*
             * NOTE: It'd be interesting to evaluate an alternative regular-expression-based
             * implementation...
             */
            StringBuilder buffer = null;

            token = null;
            int c = 0;

            // Skip white-space characters [PDF:1.6:3.1.1].
            do
            {
                c = stream.ReadByte();
                if (c == -1)
                {
                    return(false);
                }
            } while(IsWhitespace(c)); // Keep goin' till there's a white-space character...

            // Which character is it?
            switch (c)
            {
            case Symbol.Slash: // Name.
                tokenType = TokenTypeEnum.Name;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed name object).", stream.Position);
                    }
                    if (IsDelimiter(c) || IsWhitespace(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '.':
            case '-':
            case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
                switch (c)
                {
                case '.': // Decimal point.
                    tokenType = TokenTypeEnum.Real;
                    break;

                case '-':
                case '+':                              // Signum.
                    tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    break;

                default:                                   // Digit.
                    if (multipleTokenParsing)              // Plain number (multiple token parsing -- see indirect reference search).
                    {
                        tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    }
                    else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]).
                    {
                        /*
                         * NOTE: We need to identify this pattern:
                         * ref :=  { int int 'R' }
                         */
                        // Enable multiple token parsing!
                        // NOTE: This state MUST be disabled before returning.
                        multipleTokenParsing = true;

                        // 1. Object number.
                        // Try the possible object number!
                        stream.Skip(-1); MoveNext();
                        // Isn't it a valid object number?
                        if (tokenType != TokenTypeEnum.Integer)
                        {
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        // Assign object number!
                        int objectNumber = (int)token;
                        // Backup the recovery position!
                        long oldOffset = stream.Position;

                        // 2. Generation number.
                        // Try the possible generation number!
                        MoveNext();
                        // Isn't it a valid generation number?
                        if (tokenType != TokenTypeEnum.Integer)
                        {
                            // Rollback!
                            stream.Seek(oldOffset);
                            token = objectNumber; tokenType = TokenTypeEnum.Integer;
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        // Assign generation number!
                        int generationNumber = (int)token;

                        // 3. Reference keyword.
                        // Try the possible reference keyword!
                        MoveNext();
                        // Isn't it a valid reference keyword?
                        if (tokenType != TokenTypeEnum.Reference)
                        {
                            // Rollback!
                            stream.Seek(oldOffset);
                            token = objectNumber; tokenType = TokenTypeEnum.Integer;
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        token = new Reference(objectNumber, generationNumber);
                        // Disable multiple token parsing!
                        multipleTokenParsing = false;
                        return(true);
                    }
                    break;
                }

                // Building the number...
                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed number object).", stream.Position);
                    }
                    if (c == '.')
                    {
                        tokenType = TokenTypeEnum.Real;
                    }
                    else if (c < '0' || c > '9')
                    {
                        break;
                    }
                } while(true);

                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case Symbol.OpenSquareBracket: // Array (begin).
                tokenType = TokenTypeEnum.ArrayBegin;
                break;

            case Symbol.CloseSquareBracket: // Array (end).
                tokenType = TokenTypeEnum.ArrayEnd;
                break;

            case Symbol.OpenAngleBracket: // Dictionary (begin) | Hexadecimal string.
                c = stream.ReadByte();
                if (c == -1)
                {
                    throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position);
                }
                // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
                if (c == Symbol.OpenAngleBracket)
                {
                    tokenType = TokenTypeEnum.DictionaryBegin;
                    break;
                }

                // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
                tokenType = TokenTypeEnum.Hex;

                // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
                buffer = new StringBuilder();
                while (c != Symbol.CloseAngleBracket) // NOT string end.
                {
                    buffer.Append((char)c);

                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position);
                    }
                }
                break;

            case Symbol.CloseAngleBracket: // Dictionary (end).
                c = stream.ReadByte();
                if (c != Symbol.CloseAngleBracket)
                {
                    throw new FileFormatException("Malformed dictionary.", stream.Position);
                }

                tokenType = TokenTypeEnum.DictionaryEnd;
                break;

            case Symbol.OpenRoundBracket: // Literal string.
                tokenType = TokenTypeEnum.Literal;

                buffer = new StringBuilder();
                int level = 0;
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                    if (c == Symbol.OpenRoundBracket)
                    {
                        level++;
                    }
                    else if (c == Symbol.CloseRoundBracket)
                    {
                        level--;
                    }
                    else if (c == '\\')
                    {
                        bool lineBreak = false;
                        c = stream.ReadByte();
                        switch (c)
                        {
                        case 'n':
                            c = Symbol.LineFeed;
                            break;

                        case 'r':
                            c = Symbol.CarriageReturn;
                            break;

                        case 't':
                            c = '\t';
                            break;

                        case 'b':
                            c = '\b';
                            break;

                        case 'f':
                            c = '\f';
                            break;

                        case Symbol.OpenRoundBracket:
                        case Symbol.CloseRoundBracket:
                        case '\\':
                            break;

                        case Symbol.CarriageReturn:
                            lineBreak = true;
                            c         = stream.ReadByte();
                            if (c != Symbol.LineFeed)
                            {
                                stream.Skip(-1);
                            }
                            break;

                        case Symbol.LineFeed:
                            lineBreak = true;
                            break;

                        default:
                        {
                            // Is it outside the octal encoding?
                            if (c < '0' || c > '7')
                            {
                                break;
                            }

                            // Octal [PDF:1.6:3.2.3].
                            int octal = c - '0';
                            c = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = octal & 0xff;
                            break;
                        }
                        }
                        if (lineBreak)
                        {
                            continue;
                        }
                        if (c == -1)
                        {
                            break;
                        }
                    }
                    else if (c == Symbol.CarriageReturn)
                    {
                        c = stream.ReadByte();
                        if (c == -1)
                        {
                            break;
                        }
                        if (c != Symbol.LineFeed)
                        {
                            c = Symbol.LineFeed; stream.Skip(-1);
                        }
                    }
                    if (level == -1)
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                if (c == -1)
                {
                    throw new FileFormatException("Malformed literal string.", stream.Position);
                }

                break;

            case Symbol.CapitalR: // Indirect reference.
                tokenType = TokenTypeEnum.Reference;
                break;

            case Symbol.Percent: // Comment [PDF:1.6:3.1.2].
                tokenType = TokenTypeEnum.Comment;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1 ||
                        IsEOL(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                break;

            default: // Keyword object.
                tokenType = TokenTypeEnum.Keyword;

                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                } while(!IsDelimiter(c) && !IsWhitespace(c));
                stream.Skip(-1); // Recover the first byte after the current token.
                break;
            }

            if (buffer != null)
            {
                /*
                 * Current token initialization.
                 */
                // Wich token type?
                switch (tokenType)
                {
                case TokenTypeEnum.Keyword:
                    token = buffer.ToString();
                    // Late recognition.
                    switch ((string)token)
                    {
                    case Keyword.False:
                    case Keyword.True: // Boolean.
                        tokenType = TokenTypeEnum.Boolean;
                        token     = bool.Parse((string)token);
                        break;

                    case Keyword.Null: // Null.
                        tokenType = TokenTypeEnum.Null;
                        token     = null;
                        break;
                    }
                    break;

                case TokenTypeEnum.Comment:
                case TokenTypeEnum.Hex:
                case TokenTypeEnum.Name:
                    token = buffer.ToString();
                    break;

                case TokenTypeEnum.Literal:
                    token = buffer.ToString();
                    // Late recognition.
                    if (((string)token).StartsWith(Keyword.DatePrefix)) // Date.
                    {
                        tokenType = TokenTypeEnum.Date;
                        token     = PdfDate.ToDate((string)token);
                    }
                    break;

                case TokenTypeEnum.Integer:
                    token = Int32.Parse(
                        buffer.ToString(),
                        NumberStyles.Integer,
                        StandardNumberFormatInfo
                        );
                    break;

                case TokenTypeEnum.Real:
                    // [FIX:1668410] Parsing of float numbers was buggy (localized default number format).
                    token = Single.Parse(
                        buffer.ToString(),
                        NumberStyles.Float,
                        StandardNumberFormatInfo
                        );
                    break;
                }
            }
            return(true);
        }