Example #1
0
        /// <summary>
        /// Reads PDF object from input stream.
        /// </summary>
        /// <param name="pdfObject">Either the instance of a derived type or null. If it is null
        /// an appropriate object is created.</param>
        /// <param name="objectID">The address of the object.</param>
        /// <param name="includeReferences">If true, specifies that all indirect objects
        /// are included recursively.</param>
        public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences)
        {
            MoveToObject(objectID);
            int objectNumber     = ReadInteger();
            int generationNumber = ReadInteger();

#if DEBUG
            // The following assertion sometime failed (see below)
            //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber));
            if (objectID != new PdfObjectID(objectNumber, generationNumber))
            {
                // A special kind of bug? Or is this an undocumented PDF feature?
                // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf'
                // The iref table of this file contains the following entries:
                //    iref
                //    0 148
                //    0000000000 65535 f
                //    0000000015 00000 n
                //    0000000346 00000 n
                //    ....
                //    0000083236 00000 n
                //    0000083045 00000 n
                //    0000083045 00000 n
                //    0000083045 00000 n
                //    0000083045 00000 n
                //    0000080334 00000 n
                //    ....
                // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested
                // ignores this mismatch! The following assertion failed about 50 times with this file.
#if true_
                string message = String.Format("xref entry {0} {1} maps to object {2} {3}.",
                                               objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber);
                Debug.Assert(false, message);
#endif
            }
#endif
            // Always use object ID from iref table (see above)
            objectNumber     = objectID.ObjectNumber;
            generationNumber = objectID.GenerationNumber;
#if true_
            Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber));
#endif
            ReadSymbol(Symbol.Obj);

            bool   checkForStream = false;
            Symbol symbol         = ScanNextToken();
            switch (symbol)
            {
            case Symbol.BeginArray:
                PdfArray array;
                if (pdfObject == null)
                {
                    array = new PdfArray(this.document);
                }
                else
                {
                    array = (PdfArray)pdfObject;
                }
                //PdfObject.RegisterObject(array, objectID, generation);
                pdfObject = ReadArray(array, includeReferences);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                break;

            case Symbol.BeginDictionary:
                PdfDictionary dict;
                if (pdfObject == null)
                {
                    dict = new PdfDictionary(this.document);
                }
                else
                {
                    dict = (PdfDictionary)pdfObject;
                }
                //PdfObject.RegisterObject(dict, objectID, generation);
                checkForStream = true;
                pdfObject      = ReadDictionary(dict, includeReferences);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                break;

            // Acrobat 6 Professional proudly presents: The Null object!
            // Even with a one-digit object number an indirect reference �x 0 R� to this object is
            // one character larger than the direct use of �null�. Probable this is the reason why
            // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never
            // creates a reference to it!
            case Symbol.Null:
                pdfObject = new PdfNullObject(this.document);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Boolean:
                pdfObject = new PdfBooleanObject(this.document, string.Compare(this.lexer.Token, Boolean.TrueString, true) == 0); //!!!mod THHO 19.11.09
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Integer:
                pdfObject = new PdfIntegerObject(this.document, this.lexer.TokenToInteger);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.UInteger:
                pdfObject = new PdfUIntegerObject(this.document, this.lexer.TokenToUInteger);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Real:
                pdfObject = new PdfRealObject(this.document, this.lexer.TokenToReal);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.String:
                pdfObject = new PdfStringObject(this.document, this.lexer.Token);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Name:
                pdfObject = new PdfNameObject(this.document, this.lexer.Token);
                pdfObject.SetObjectID(objectNumber, generationNumber);
                ReadSymbol(Symbol.EndObj);
                return(pdfObject);

            case Symbol.Keyword:
                // Should not come here anymore
                throw new NotImplementedException("Keyword");

            default:
                // Should not come here anymore
                throw new NotImplementedException("unknown token \"" + symbol + "\"");
            }
            symbol = ScanNextToken();
            if (symbol == Symbol.BeginStream)
            {
                PdfDictionary dict = (PdfDictionary)pdfObject;
                Debug.Assert(checkForStream, "Unexpected stream...");
                int    length = GetStreamLength(dict);
                byte[] bytes  = this.lexer.ReadStream(length);
#if true_
                if (dict.Elements.GetString("/Filter") == "/FlateDecode")
                {
                    if (dict.Elements["/Subtype"] == null)
                    {
                        try
                        {
                            byte[] decoded = Filtering.FlateDecode.Decode(bytes);
                            if (decoded.Length == 0)
                            {
                                goto End;
                            }
                            string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
                            if (pageContent.Length > 100)
                            {
                                pageContent = pageContent.Substring(pageContent.Length - 100);
                            }
                            pageContent.GetType();
                            bytes = decoded;
                            dict.Elements.Remove("/Filter");
                            dict.Elements.SetInteger("/Length", bytes.Length);
                        }
                        catch
                        {
                        }
                    }
                    End :;
                }
#endif
                PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
                dict.Stream = stream;
                ReadSymbol(Symbol.EndStream);
                symbol = ScanNextToken();
            }
            if (symbol != Symbol.EndObj)
            {
                throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token));
            }
            return(pdfObject);
        }
Example #2
0
        /// <summary>
        /// Reads PDF object from input stream.
        /// </summary>
        /// <param name="pdfObject">Either the instance of a derived type or null. If it is null
        /// an appropriate object is created.</param>
        /// <param name="objectID">The address of the object.</param>
        /// <param name="includeReferences">If true, specifies that all indirect objects
        /// are included recursively.</param>
        /// <param name="fromObjecStream">If true, the objects is parsed from an object stream.</param>
        public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences, bool fromObjecStream)
        {
#if DEBUG_
            Debug.WriteLine("ReadObject: " + objectID);
            if (objectID.ObjectNumber == 20)
                GetType();
#endif
            int objectNumber = objectID.ObjectNumber;
            int generationNumber = objectID.GenerationNumber;
            if (!fromObjecStream)
            {
                MoveToObject(objectID);
                objectNumber = ReadInteger();
                generationNumber = ReadInteger();
            }
#if DEBUG
            // The following assertion sometime failed (see below)
            //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber));
            if (!fromObjecStream && objectID != new PdfObjectID(objectNumber, generationNumber))
            {
                // A special kind of bug? Or is this an undocumented PDF feature?
                // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf'
                // The iref table of this file contains the following entries:
                //    iref
                //    0 148
                //    0000000000 65535 f 
                //    0000000015 00000 n 
                //    0000000346 00000 n 
                //    ....
                //    0000083236 00000 n 
                //    0000083045 00000 n 
                //    0000083045 00000 n 
                //    0000083045 00000 n 
                //    0000083045 00000 n 
                //    0000080334 00000 n 
                //    ....
                // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested
                // ignores this mismatch! The following assertion failed about 50 times with this file.
#if true_
                string message = String.Format("xref entry {0} {1} maps to object {2} {3}.",
                    objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber);
                Debug.Assert(false, message);
#endif
            }
#endif
            // Always use object ID from iref table (see above).
            objectNumber = objectID.ObjectNumber;
            generationNumber = objectID.GenerationNumber;
#if true_
            Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber));
#endif
            if (!fromObjecStream)
                ReadSymbol(Symbol.Obj);

            bool checkForStream = false;
            Symbol symbol = ScanNextToken();
            switch (symbol)
            {
                case Symbol.BeginArray:
                    PdfArray array;
                    if (pdfObject == null)
                        array = new PdfArray(_document);
                    else
                        array = (PdfArray)pdfObject;
                    //PdfObject.RegisterObject(array, objectID, generation);
                    pdfObject = ReadArray(array, includeReferences);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    break;

                case Symbol.BeginDictionary:
                    PdfDictionary dict;
                    if (pdfObject == null)
                        dict = new PdfDictionary(_document);
                    else
                        dict = (PdfDictionary)pdfObject;
                    //PdfObject.RegisterObject(dict, objectID, generation);
                    checkForStream = true;
                    pdfObject = ReadDictionary(dict, includeReferences);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    break;

                // Acrobat 6 Professional proudly presents: The Null object!
                // Even with a one-digit object number an indirect reference «x 0 R» to this object is
                // one character larger than the direct use of «null». Probable this is the reason why
                // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never 
                // creates a reference to it!
                case Symbol.Null:
                    pdfObject = new PdfNullObject(_document);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    if (!fromObjecStream)
                        ReadSymbol(Symbol.EndObj);
                    return pdfObject;

                case Symbol.Boolean:
                    pdfObject = new PdfBooleanObject(_document, String.Compare(_lexer.Token, Boolean.TrueString, StringComparison.OrdinalIgnoreCase) == 0);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    if (!fromObjecStream)
                        ReadSymbol(Symbol.EndObj);
                    return pdfObject;

                case Symbol.Integer:
                    pdfObject = new PdfIntegerObject(_document, _lexer.TokenToInteger);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    if (!fromObjecStream)
                        ReadSymbol(Symbol.EndObj);
                    return pdfObject;

                case Symbol.UInteger:
                    pdfObject = new PdfUIntegerObject(_document, _lexer.TokenToUInteger);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    if (!fromObjecStream)
                        ReadSymbol(Symbol.EndObj);
                    return pdfObject;

                case Symbol.Real:
                    pdfObject = new PdfRealObject(_document, _lexer.TokenToReal);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    if (!fromObjecStream)
                        ReadSymbol(Symbol.EndObj);
                    return pdfObject;

                case Symbol.String:
                    pdfObject = new PdfStringObject(_document, _lexer.Token);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    if (!fromObjecStream)
                        ReadSymbol(Symbol.EndObj);
                    return pdfObject;

                case Symbol.Name:
                    pdfObject = new PdfNameObject(_document, _lexer.Token);
                    pdfObject.SetObjectID(objectNumber, generationNumber);
                    if (!fromObjecStream)
                        ReadSymbol(Symbol.EndObj);
                    return pdfObject;

                case Symbol.Keyword:
                    // Should not come here anymore.
                    ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
                    break;

                default:
                    // Should not come here anymore.
                    ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
                    break;
            }
            symbol = ScanNextToken();
            if (symbol == Symbol.BeginStream)
            {
                PdfDictionary dict = (PdfDictionary)pdfObject;
                Debug.Assert(checkForStream, "Unexpected stream...");
#if true_
                ReadStream(dict);
#else
                int length = GetStreamLength(dict);
                byte[] bytes = _lexer.ReadStream(length);
#if true_
                if (dict.Elements.GetString("/Filter") == "/FlateDecode")
                {
                    if (dict.Elements["/Subtype"] == null)
                    {
                        try
                        {
                            byte[] decoded = Filtering.FlateDecode.Decode(bytes);
                            if (decoded.Length == 0)
                                goto End;
                            string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
                            if (pageContent.Length > 100)
                                pageContent = pageContent.Substring(pageContent.Length - 100);
                            pageContent.GetType();
                            bytes = decoded;
                            dict.Elements.Remove("/Filter");
                            dict.Elements.SetInteger("/Length", bytes.Length);
                        }
                        catch
                        {
                        }
                    }
                End: ;
                }
#endif
                PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
                dict.Stream = stream;
                ReadSymbol(Symbol.EndStream);
                symbol = ScanNextToken();
#endif
            }
            if (!fromObjecStream && symbol != Symbol.EndObj)
                ParserDiagnostics.ThrowParserException(PSSR.UnexpectedToken(_lexer.Token));
            return pdfObject;
        }