Exemplo n.º 1
0
        private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            var numOffset = reader.GetPosition();
            var value     = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);

            // proceed if the given object is a number and the following is a number as well
            if (!(value is ICosNumber) || !ReadHelper.IsDigit(reader))
            {
                return(value);
            }
            // read the remaining information of the object number
            var genOffset        = reader.GetPosition();
            var generationNumber = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);
            ReadHelper.ReadExpectedChar(reader, 'R');
            if (!(value is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + numOffset);
            }
            if (!(generationNumber is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset);
            }

            var key = new CosObjectKey(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt());

            // dereference the object
            return(pool.Get(key));
        }
Exemplo n.º 2
0
        private bool checkObjectKeys(IRandomAccessRead source, CosObjectKey objectKey, long offset)
        {
            // there can't be any object at the very beginning of a pdf
            if (offset < MINIMUM_SEARCH_OFFSET)
            {
                return(false);
            }
            long   objectNr     = objectKey.Number;
            long   objectGen    = objectKey.Generation;
            long   originOffset = source.GetPosition();
            string objectString = ObjectHelper.createObjectString(objectNr, objectGen);

            try
            {
                source.Seek(offset);
                if (ReadHelper.IsString(source, OtherEncodings.StringAsLatin1Bytes(objectString)))
                {
                    // everything is ok, return origin object key
                    source.Seek(originOffset);
                    return(true);
                }
            }
            catch (InvalidOperationException exception)
            {
                // Swallow the exception, obviously there isn't any valid object number
            }
            finally
            {
                source.Seek(originOffset);
            }
            // no valid object number found
            return(false);
        }
Exemplo n.º 3
0
        public CosBase Parse(IRandomAccessRead reader, long objectNumber, int objectGeneration,
                             CosObjectPool pool, CrossReferenceTable crossReferenceTable,
                             BruteForceSearcher bruteForceSearcher,
                             bool isLenient,
                             bool requireExistingObject)
        {
            if (pool == null)
            {
                throw new ArgumentNullException(nameof(pool));
            }

            var key = new CosObjectKey(objectNumber, objectGeneration);

            var pdfObject = pool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            if (crossReferenceTable == null)
            {
                throw new ArgumentNullException(nameof(crossReferenceTable));
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenient && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                return(CosNull.Null);
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, pool, isLenient));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, objectNumber, pool, crossReferenceTable, bruteForceSearcher, isLenient));
        }
Exemplo n.º 4
0
        private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
                                            CosObjectKey key,
                                            CosObjectPool pool,
                                            bool isLenientParsing)
        {
            reader.Seek(offset);

            var objectNumber     = ObjectHelper.ReadObjectNumber(reader);
            var objectGeneration = ObjectHelper.ReadGenerationNumber(reader);

            ReadHelper.ReadExpectedString(reader, "obj", true);

            if (objectNumber != key.Number || objectGeneration != key.Generation)
            {
                throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
            }

            ReadHelper.SkipSpaces(reader);

            var baseObject = baseParser.Parse(reader, pool);

            var endObjectKey = ReadHelper.ReadString(reader);

            var atStreamStart = string.Equals(endObjectKey, "stream");

            if (atStreamStart)
            {
                var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey);

                reader.Rewind(streamStartBytes.Length);

                baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey);
            }

            if (!string.Equals(endObjectKey, "endobj"))
            {
                var message =
                    $"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'";

                if (isLenientParsing)
                {
                    log.Warn(message);
                }
                else
                {
                    throw new InvalidOperationException(message);
                }
            }

            return(baseObject);
        }
Exemplo n.º 5
0
        public void CanLookupInDictionary()
        {
            var key1 = new CosObjectKey(3, 0);
            var key2 = new CosObjectKey(3, 0);

            var dictionary = new Dictionary <CosObjectKey, long>
            {
                { key1, 5 }
            };

            var result = dictionary[key2];

            Assert.Equal(5, result);
        }
Exemplo n.º 6
0
        public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false)
        {
            var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation);

            var pdfObject = objectPool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenientParsing && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                if (isLenientParsing)
                {
                    return(CosNull.Null);
                }

                throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table.");
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing));
        }
Exemplo n.º 7
0
 /**
  * Populate XRef HashMap of current XRef object.
  * Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file.
  * @param objKey The objkey, with id and gen numbers
  * @param offset The byte offset in this file
  */
 public void setXRef(CosObjectKey objKey, long offset)
 {
     if (curXrefTrailerObj == null)
     {
         // should not happen...
         // LOG.warn("Cannot add XRef entry for '" + objKey.getNumber() + "' because XRef start was not signalled.");
         return;
     }
     // PDFBOX-3506 check before adding to the map, to avoid entries from the table being
     // overwritten by obsolete entries in hybrid files (/XRefStm entry)
     if (!curXrefTrailerObj.xrefTable.ContainsKey(objKey))
     {
         curXrefTrailerObj.xrefTable[objKey] = offset;
     }
 }
Exemplo n.º 8
0
 private bool validateXrefOffsets(IRandomAccessRead reader, Dictionary <CosObjectKey, long> xrefOffset)
 {
     if (xrefOffset == null)
     {
         return(true);
     }
     foreach (var objectEntry in xrefOffset)
     {
         CosObjectKey objectKey    = objectEntry.Key;
         long         objectOffset = objectEntry.Value;
         // a negative offset number represents a object number itself
         // see type 2 entry in xref stream
         if (objectOffset != null && objectOffset >= 0 &&
             !checkObjectKeys(reader, objectKey, objectOffset))
         {
             //LOG.debug("Stop checking xref offsets as at least one (" + objectKey
             //        + ") couldn't be dereferenced");
             return(false);
         }
     }
     return(true);
 }
Exemplo n.º 9
0
        private CosBase ParseCompressedStreamObject(IRandomAccessRead reader, long streamObjectNumber, long requestedNumber, CosObjectPool objectPool, CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenientParsing)
        {
            var baseStream = Parse(reader, streamObjectNumber, 0, objectPool, crossReferenceTable, bruteForceSearcher,
                                   isLenientParsing, true);

            if (!(baseStream is PdfRawStream stream))
            {
                log.Warn($"Could not find a stream for the object number, defaults to returning CosNull: {streamObjectNumber}");

                return(CosNull.Null);
            }

            var objects = objectStreamParser.Parse(stream, objectPool);

            // register all objects which are referenced to be contained in object stream
            foreach (var next in objects)
            {
                var streamKey = new CosObjectKey(next);
                var offset    = TryGet(streamKey, crossReferenceTable.ObjectOffsets);

                if (offset != null && offset == -streamObjectNumber)
                {
                    var streamObject = objectPool.Get(streamKey);
                    streamObject.SetObject(next.GetObject());
                }
            }

            var matchingStreamObject = objects.FirstOrDefault(x => x.GetObjectNumber() == requestedNumber);

            if (matchingStreamObject != null)
            {
                return(matchingStreamObject);
            }

            log.Error($"Could not find the object {requestedNumber} in the stream for object {streamObjectNumber}. Returning CosNull.");

            return(CosNull.Null);
        }
Exemplo n.º 10
0
 public CosBase Parse(ParsingArguments arguments, CosObjectKey key, bool requiresExistingObject)
 {
     return(Parse(arguments.Reader, key.Number, (int)key.Generation, arguments.CachingProviders.ObjectPool,
                  arguments.CrossReferenceTable, arguments.CachingProviders.BruteForceSearcher,
                  arguments.IsLenientParsing, requiresExistingObject));
 }
Exemplo n.º 11
0
        public COSArray Parse(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool)
        {
            ReadHelper.ReadExpectedChar(reader, '[');
            var     po = new COSArray();
            CosBase pbo;

            ReadHelper.SkipSpaces(reader);
            int i;

            while (((i = reader.Peek()) > 0) && ((char)i != ']'))
            {
                pbo = baseParser.Parse(reader, pool);
                if (pbo is CosObject)
                {
                    // We have to check if the expected values are there or not PDFBOX-385
                    if (po.get(po.size() - 1) is CosInt)
                    {
                        var genNumber = (CosInt)po.remove(po.size() - 1);
                        if (po.get(po.size() - 1) is CosInt)
                        {
                            var          number = (CosInt)po.remove(po.size() - 1);
                            CosObjectKey key    = new CosObjectKey(number.AsLong(), genNumber.AsInt());
                            pbo = pool.Get(key);
                        }
                        else
                        {
                            // the object reference is somehow wrong
                            pbo = null;
                        }
                    }
                    else
                    {
                        pbo = null;
                    }
                }
                if (pbo != null)
                {
                    po.add(pbo);
                }
                else
                {
                    //it could be a bad object in the array which is just skipped
                    // LOG.warn("Corrupt object reference at offset " + seqSource.getPosition());

                    // This could also be an "endobj" or "endstream" which means we can assume that
                    // the array has ended.
                    string isThisTheEnd = ReadHelper.ReadString(reader);
                    reader.Unread(OtherEncodings.StringAsLatin1Bytes(isThisTheEnd));
                    if (string.Equals(isThisTheEnd, "endobj") || string.Equals(isThisTheEnd, "endstream"))
                    {
                        return(po);
                    }
                }

                ReadHelper.SkipSpaces(reader);
            }
            // read ']'
            reader.Read();
            ReadHelper.SkipSpaces(reader);
            return(po);
        }
Exemplo n.º 12
0
        /**
         * Check the XRef table by dereferencing all objects and fixing the offset if necessary.
         *
         * @throws InvalidOperationException if something went wrong.
         */
        public void checkXrefOffsets(IRandomAccessRead reader, CrossReferenceTable xrefTrailerResolver, bool isLenientParsing)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenientParsing)
            {
                return;
            }
            Dictionary <CosObjectKey, long> xrefOffset = xrefTrailerResolver.ObjectOffsets.ToDictionary(x => x.Key, x => x.Value);

            if (validateXrefOffsets(reader, xrefOffset))
            {
                return;
            }

            Dictionary <CosObjectKey, long> bfCOSObjectKeyOffsets = getBFCosObjectOffsets(reader);

            if (bfCOSObjectKeyOffsets.Count > 0)
            {
                List <CosObjectKey> objStreams = new List <CosObjectKey>();
                // find all object streams
                foreach (var entry in xrefOffset)
                {
                    long offset = entry.Value;
                    if (offset != null && offset < 0)
                    {
                        CosObjectKey objStream = new CosObjectKey(-offset, 0);
                        if (!objStreams.Contains(objStream))
                        {
                            objStreams.Add(new CosObjectKey(-offset, 0));
                        }
                    }
                }
                // remove all found object streams
                if (objStreams.Count > 0)
                {
                    foreach (CosObjectKey key in objStreams)
                    {
                        if (bfCOSObjectKeyOffsets.ContainsKey(key))
                        {
                            // remove all parsed objects which are part of an object stream
                            //ISet<long> objects = xrefTrailerResolver
                            //    .getContainedObjectNumbers((int)(key.Number));
                            //foreach (long objNr in objects)
                            //{
                            //    CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0);

                            //    if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0)
                            //    {
                            //        bfCOSObjectKeyOffsets.Remove(streamObjectKey);
                            //    }
                            //}
                        }
                        else
                        {
                            // remove all objects which are part of an object stream which wasn't found
                            //ISet<long> objects = xrefTrailerResolver
                            //    .getContainedObjectNumbers((int)(key.Number));
                            //foreach (long objNr in objects)
                            //{
                            //    xrefOffset.Remove(new CosObjectKey(objNr, 0));
                            //}
                        }
                    }
                }

                foreach (var item in bfCOSObjectKeyOffsets)
                {
                    xrefOffset.Add(item.Key, item.Value);
                }
            }
        }