Beispiel #1
0
        /**
         * <summary>Retrieves the PDF version of the file [PDF:1.6:3.4.1].</summary>
         */
        public string RetrieveVersion(
            )
        {
            IInputStream stream = Stream;

            stream.Seek(0);
            string header = stream.ReadString(10);

            if (!header.StartsWith(Keyword.BOF))
            {
                throw new PostScriptParseException("PDF header not found.", this);
            }

            return(header.Substring(Keyword.BOF.Length, 3));
        }
Beispiel #2
0
        public override bool MoveNext(
            )
        {
            bool moved = base.MoveNext();

            if (moved)
            {
                switch (TokenType)
                {
                case TokenTypeEnum.Integer:
                {
                    /*
                     * NOTE: We need to verify whether indirect reference pattern is applicable:
                     * ref :=  { int int 'R' }
                     */
                    IInputStream stream     = Stream;
                    long         baseOffset = stream.Position; // Backs up the recovery position.

                    // 1. Object number.
                    int objectNumber = (int)Token;
                    // 2. Generation number.
                    base.MoveNext();
                    if (TokenType == TokenTypeEnum.Integer)
                    {
                        int generationNumber = (int)Token;
                        // 3. Reference keyword.
                        base.MoveNext();
                        if (TokenType == TokenTypeEnum.Keyword &&
                            Token.Equals(Keyword.Reference))
                        {
                            Token = new Reference(objectNumber, generationNumber);
                        }
                    }
                    if (!(Token is Reference))
                    {
                        // Rollback!
                        stream.Seek(baseOffset);
                        Token     = objectNumber;
                        TokenType = TokenTypeEnum.Integer;
                    }
                } break;
                }
            }
            return(moved);
        }
 /**
  * <summary>Moves the pointer to the given absolute byte position.</summary>
  */
 public void Seek(
     long offset
     )
 {
     stream.Seek(offset);
 }
Beispiel #4
0
        public override PdfDataObject ParsePdfObject(
            )
        {
            switch (TokenType)
            {
            case TokenTypeEnum.Keyword:
                if (Token is Reference)
                {
                    Reference reference = (Reference)Token;
                    return(new PdfReference(reference.ObjectNumber, reference.GenerationNumber, file));
                }
                break;
            }

            PdfDataObject pdfObject = base.ParsePdfObject();

            if (pdfObject is PdfDictionary)
            {
                IInputStream stream    = Stream;
                int          oldOffset = (int)stream.Position;
                MoveNext();
                // Is this dictionary the header of a stream object [PDF:1.6:3.2.7]?
                if ((TokenType == TokenTypeEnum.Keyword) &&
                    Token.Equals(Keyword.BeginStream))
                {
                    PdfDictionary streamHeader = (PdfDictionary)pdfObject;

                    // Keep track of current position!

                    /*
                     * NOTE: Indirect reference resolution is an outbound call which affects the stream pointer position,
                     * so we need to recover our current position after it returns.
                     */
                    long position = stream.Position;
                    // Get the stream length!
                    int length = ((PdfInteger)streamHeader.Resolve(PdfName.Length)).IntValue;
                    // Move to the stream data beginning!
                    stream.Seek(position); SkipEOL();

                    // Copy the stream data to the instance!
                    byte[] data = new byte[length];
                    stream.Read(data);

                    MoveNext(); // Postcondition (last token should be 'endstream' keyword).

                    Object streamType = streamHeader[PdfName.Type];
                    if (PdfName.ObjStm.Equals(streamType)) // Object stream [PDF:1.6:3.4.6].
                    {
                        return(new ObjectStream(
                                   streamHeader,
                                   new bytes.Buffer(data)
                                   ));
                    }
                    else if (PdfName.XRef.Equals(streamType)) // Cross-reference stream [PDF:1.6:3.4.7].
                    {
                        return(new XRefStream(
                                   streamHeader,
                                   new bytes.Buffer(data)
                                   ));
                    }
                    else // Generic stream.
                    {
                        return(new PdfStream(
                                   streamHeader,
                                   new bytes.Buffer(data)
                                   ));
                    }
                }
                else // Stand-alone dictionary.
                {
                    stream.Seek(oldOffset);
                }                 // Restores postcondition (last token should be the dictionary end).
            }
            return(pdfObject);
        }
Beispiel #5
0
        /**
         * <summary>Parses the next token [PDF:1.6:3.1].</summary>
         * <remarks>To properly parse the current token, the pointer MUST be just before its starting
         * (leading whitespaces are ignored). When this method terminates, the pointer IS
         * at the last byte of the current token.</remarks>
         * <returns>Whether a new token was found.</returns>
         */
        public bool MoveNext(
            )
        {
            /*
             * NOTE: It'd be interesting to evaluate an alternative regular-expression-based
             * implementation...
             */
            StringBuilder buffer = null;

            token = null;
            int c = 0;

            // Skip white-space characters [PDF:1.6:3.1.1].
            do
            {
                c = stream.ReadByte();
                if (c == -1)
                {
                    return(false);
                }
            } while(IsWhitespace(c)); // Keep goin' till there's a white-space character...

            // Which character is it?
            switch (c)
            {
            case Symbol.Slash: // Name.
                tokenType = TokenTypeEnum.Name;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed name object).", stream.Position);
                    }
                    if (IsDelimiter(c) || IsWhitespace(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '.':
            case '-':
            case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
                switch (c)
                {
                case '.': // Decimal point.
                    tokenType = TokenTypeEnum.Real;
                    break;

                case '-':
                case '+':                              // Signum.
                    tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    break;

                default:                                   // Digit.
                    if (multipleTokenParsing)              // Plain number (multiple token parsing -- see indirect reference search).
                    {
                        tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    }
                    else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]).
                    {
                        /*
                         * NOTE: We need to identify this pattern:
                         * ref :=  { int int 'R' }
                         */
                        // Enable multiple token parsing!
                        // NOTE: This state MUST be disabled before returning.
                        multipleTokenParsing = true;

                        // 1. Object number.
                        // Try the possible object number!
                        stream.Skip(-1); MoveNext();
                        // Isn't it a valid object number?
                        if (tokenType != TokenTypeEnum.Integer)
                        {
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        // Assign object number!
                        int objectNumber = (int)token;
                        // Backup the recovery position!
                        long oldOffset = stream.Position;

                        // 2. Generation number.
                        // Try the possible generation number!
                        MoveNext();
                        // Isn't it a valid generation number?
                        if (tokenType != TokenTypeEnum.Integer)
                        {
                            // Rollback!
                            stream.Seek(oldOffset);
                            token = objectNumber; tokenType = TokenTypeEnum.Integer;
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        // Assign generation number!
                        int generationNumber = (int)token;

                        // 3. Reference keyword.
                        // Try the possible reference keyword!
                        MoveNext();
                        // Isn't it a valid reference keyword?
                        if (tokenType != TokenTypeEnum.Reference)
                        {
                            // Rollback!
                            stream.Seek(oldOffset);
                            token = objectNumber; tokenType = TokenTypeEnum.Integer;
                            // Disable multiple token parsing!
                            multipleTokenParsing = false;
                            return(true);
                        }
                        token = new Reference(objectNumber, generationNumber);
                        // Disable multiple token parsing!
                        multipleTokenParsing = false;
                        return(true);
                    }
                    break;
                }

                // Building the number...
                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed number object).", stream.Position);
                    }
                    if (c == '.')
                    {
                        tokenType = TokenTypeEnum.Real;
                    }
                    else if (c < '0' || c > '9')
                    {
                        break;
                    }
                } while(true);

                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case Symbol.OpenSquareBracket: // Array (begin).
                tokenType = TokenTypeEnum.ArrayBegin;
                break;

            case Symbol.CloseSquareBracket: // Array (end).
                tokenType = TokenTypeEnum.ArrayEnd;
                break;

            case Symbol.OpenAngleBracket: // Dictionary (begin) | Hexadecimal string.
                c = stream.ReadByte();
                if (c == -1)
                {
                    throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position);
                }
                // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
                if (c == Symbol.OpenAngleBracket)
                {
                    tokenType = TokenTypeEnum.DictionaryBegin;
                    break;
                }

                // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
                tokenType = TokenTypeEnum.Hex;

                // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
                buffer = new StringBuilder();
                while (c != Symbol.CloseAngleBracket) // NOT string end.
                {
                    buffer.Append((char)c);

                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position);
                    }
                }
                break;

            case Symbol.CloseAngleBracket: // Dictionary (end).
                c = stream.ReadByte();
                if (c != Symbol.CloseAngleBracket)
                {
                    throw new FileFormatException("Malformed dictionary.", stream.Position);
                }

                tokenType = TokenTypeEnum.DictionaryEnd;
                break;

            case Symbol.OpenRoundBracket: // Literal string.
                tokenType = TokenTypeEnum.Literal;

                buffer = new StringBuilder();
                int level = 0;
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                    if (c == Symbol.OpenRoundBracket)
                    {
                        level++;
                    }
                    else if (c == Symbol.CloseRoundBracket)
                    {
                        level--;
                    }
                    else if (c == '\\')
                    {
                        bool lineBreak = false;
                        c = stream.ReadByte();
                        switch (c)
                        {
                        case 'n':
                            c = Symbol.LineFeed;
                            break;

                        case 'r':
                            c = Symbol.CarriageReturn;
                            break;

                        case 't':
                            c = '\t';
                            break;

                        case 'b':
                            c = '\b';
                            break;

                        case 'f':
                            c = '\f';
                            break;

                        case Symbol.OpenRoundBracket:
                        case Symbol.CloseRoundBracket:
                        case '\\':
                            break;

                        case Symbol.CarriageReturn:
                            lineBreak = true;
                            c         = stream.ReadByte();
                            if (c != Symbol.LineFeed)
                            {
                                stream.Skip(-1);
                            }
                            break;

                        case Symbol.LineFeed:
                            lineBreak = true;
                            break;

                        default:
                        {
                            // Is it outside the octal encoding?
                            if (c < '0' || c > '7')
                            {
                                break;
                            }

                            // Octal [PDF:1.6:3.2.3].
                            int octal = c - '0';
                            c = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = octal & 0xff;
                            break;
                        }
                        }
                        if (lineBreak)
                        {
                            continue;
                        }
                        if (c == -1)
                        {
                            break;
                        }
                    }
                    else if (c == Symbol.CarriageReturn)
                    {
                        c = stream.ReadByte();
                        if (c == -1)
                        {
                            break;
                        }
                        if (c != Symbol.LineFeed)
                        {
                            c = Symbol.LineFeed; stream.Skip(-1);
                        }
                    }
                    if (level == -1)
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                if (c == -1)
                {
                    throw new FileFormatException("Malformed literal string.", stream.Position);
                }

                break;

            case Symbol.CapitalR: // Indirect reference.
                tokenType = TokenTypeEnum.Reference;
                break;

            case Symbol.Percent: // Comment [PDF:1.6:3.1.2].
                tokenType = TokenTypeEnum.Comment;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1 ||
                        IsEOL(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                break;

            default: // Keyword object.
                tokenType = TokenTypeEnum.Keyword;

                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                } while(!IsDelimiter(c) && !IsWhitespace(c));
                stream.Skip(-1); // Recover the first byte after the current token.
                break;
            }

            if (buffer != null)
            {
                /*
                 * Current token initialization.
                 */
                // Wich token type?
                switch (tokenType)
                {
                case TokenTypeEnum.Keyword:
                    token = buffer.ToString();
                    // Late recognition.
                    switch ((string)token)
                    {
                    case Keyword.False:
                    case Keyword.True: // Boolean.
                        tokenType = TokenTypeEnum.Boolean;
                        token     = bool.Parse((string)token);
                        break;

                    case Keyword.Null: // Null.
                        tokenType = TokenTypeEnum.Null;
                        token     = null;
                        break;
                    }
                    break;

                case TokenTypeEnum.Comment:
                case TokenTypeEnum.Hex:
                case TokenTypeEnum.Name:
                    token = buffer.ToString();
                    break;

                case TokenTypeEnum.Literal:
                    token = buffer.ToString();
                    // Late recognition.
                    if (((string)token).StartsWith(Keyword.DatePrefix)) // Date.
                    {
                        tokenType = TokenTypeEnum.Date;
                        token     = PdfDate.ToDate((string)token);
                    }
                    break;

                case TokenTypeEnum.Integer:
                    token = Int32.Parse(
                        buffer.ToString(),
                        NumberStyles.Integer,
                        StandardNumberFormatInfo
                        );
                    break;

                case TokenTypeEnum.Real:
                    // [FIX:1668410] Parsing of float numbers was buggy (localized default number format).
                    token = Single.Parse(
                        buffer.ToString(),
                        NumberStyles.Float,
                        StandardNumberFormatInfo
                        );
                    break;
                }
            }
            return(true);
        }
Beispiel #6
0
 public static Index Parse(IInputStream stream, int offset)
 {
     stream.Seek(offset);
     return(Parse(stream));
 }