private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool) { var numOffset = reader.GetPosition(); var value = baseParser.Parse(reader, pool); ReadHelper.SkipSpaces(reader); // proceed if the given object is a number and the following is a number as well if (!(value is ICosNumber) || !ReadHelper.IsDigit(reader)) { return(value); } // read the remaining information of the object number var genOffset = reader.GetPosition(); var generationNumber = baseParser.Parse(reader, pool); ReadHelper.SkipSpaces(reader); ReadHelper.ReadExpectedChar(reader, 'R'); if (!(value is CosInt)) { throw new InvalidOperationException("expected number, actual=" + value + " at offset " + numOffset); } if (!(generationNumber is CosInt)) { throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset); } var key = new CosObjectKey(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt()); // dereference the object return(pool.Get(key)); }
private bool checkObjectKeys(IRandomAccessRead source, CosObjectKey objectKey, long offset) { // there can't be any object at the very beginning of a pdf if (offset < MINIMUM_SEARCH_OFFSET) { return(false); } long objectNr = objectKey.Number; long objectGen = objectKey.Generation; long originOffset = source.GetPosition(); string objectString = ObjectHelper.createObjectString(objectNr, objectGen); try { source.Seek(offset); if (ReadHelper.IsString(source, OtherEncodings.StringAsLatin1Bytes(objectString))) { // everything is ok, return origin object key source.Seek(originOffset); return(true); } } catch (InvalidOperationException exception) { // Swallow the exception, obviously there isn't any valid object number } finally { source.Seek(originOffset); } // no valid object number found return(false); }
public CosBase Parse(IRandomAccessRead reader, long objectNumber, int objectGeneration, CosObjectPool pool, CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenient, bool requireExistingObject) { if (pool == null) { throw new ArgumentNullException(nameof(pool)); } var key = new CosObjectKey(objectNumber, objectGeneration); var pdfObject = pool.GetOrCreateDefault(key); if (pdfObject.GetObject() != null) { return(pdfObject.GetObject()); } if (crossReferenceTable == null) { throw new ArgumentNullException(nameof(crossReferenceTable)); } var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets); if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0)) { throw new InvalidOperationException("Object must be defined and not compressed: " + key); } if (isLenient && offsetOrStreamNumber == null) { var locations = bruteForceSearcher.GetObjectLocations(); offsetOrStreamNumber = TryGet(key, locations); if (offsetOrStreamNumber != null) { crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value); } } if (offsetOrStreamNumber == null) { return(CosNull.Null); } var isCompressedStreamObject = offsetOrStreamNumber <= 0; if (!isCompressedStreamObject) { return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, pool, isLenient)); } return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, objectNumber, pool, crossReferenceTable, bruteForceSearcher, isLenient)); }
private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader, CosObjectKey key, CosObjectPool pool, bool isLenientParsing) { reader.Seek(offset); var objectNumber = ObjectHelper.ReadObjectNumber(reader); var objectGeneration = ObjectHelper.ReadGenerationNumber(reader); ReadHelper.ReadExpectedString(reader, "obj", true); if (objectNumber != key.Number || objectGeneration != key.Generation) { throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}"); } ReadHelper.SkipSpaces(reader); var baseObject = baseParser.Parse(reader, pool); var endObjectKey = ReadHelper.ReadString(reader); var atStreamStart = string.Equals(endObjectKey, "stream"); if (atStreamStart) { var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey); reader.Rewind(streamStartBytes.Length); baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey); } if (!string.Equals(endObjectKey, "endobj")) { var message = $"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'"; if (isLenientParsing) { log.Warn(message); } else { throw new InvalidOperationException(message); } } return(baseObject); }
public void CanLookupInDictionary() { var key1 = new CosObjectKey(3, 0); var key2 = new CosObjectKey(3, 0); var dictionary = new Dictionary <CosObjectKey, long> { { key1, 5 } }; var result = dictionary[key2]; Assert.Equal(5, result); }
public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false) { var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation); var pdfObject = objectPool.GetOrCreateDefault(key); if (pdfObject.GetObject() != null) { return(pdfObject.GetObject()); } var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets); if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0)) { throw new InvalidOperationException("Object must be defined and not compressed: " + key); } if (isLenientParsing && offsetOrStreamNumber == null) { var locations = bruteForceSearcher.GetObjectLocations(); offsetOrStreamNumber = TryGet(key, locations); if (offsetOrStreamNumber != null) { crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value); } } if (offsetOrStreamNumber == null) { if (isLenientParsing) { return(CosNull.Null); } throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table."); } var isCompressedStreamObject = offsetOrStreamNumber <= 0; if (!isCompressedStreamObject) { return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing)); } return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing)); }
/** * Populate XRef HashMap of current XRef object. * Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file. * @param objKey The objkey, with id and gen numbers * @param offset The byte offset in this file */ public void setXRef(CosObjectKey objKey, long offset) { if (curXrefTrailerObj == null) { // should not happen... // LOG.warn("Cannot add XRef entry for '" + objKey.getNumber() + "' because XRef start was not signalled."); return; } // PDFBOX-3506 check before adding to the map, to avoid entries from the table being // overwritten by obsolete entries in hybrid files (/XRefStm entry) if (!curXrefTrailerObj.xrefTable.ContainsKey(objKey)) { curXrefTrailerObj.xrefTable[objKey] = offset; } }
private bool validateXrefOffsets(IRandomAccessRead reader, Dictionary <CosObjectKey, long> xrefOffset) { if (xrefOffset == null) { return(true); } foreach (var objectEntry in xrefOffset) { CosObjectKey objectKey = objectEntry.Key; long objectOffset = objectEntry.Value; // a negative offset number represents a object number itself // see type 2 entry in xref stream if (objectOffset != null && objectOffset >= 0 && !checkObjectKeys(reader, objectKey, objectOffset)) { //LOG.debug("Stop checking xref offsets as at least one (" + objectKey // + ") couldn't be dereferenced"); return(false); } } return(true); }
private CosBase ParseCompressedStreamObject(IRandomAccessRead reader, long streamObjectNumber, long requestedNumber, CosObjectPool objectPool, CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenientParsing) { var baseStream = Parse(reader, streamObjectNumber, 0, objectPool, crossReferenceTable, bruteForceSearcher, isLenientParsing, true); if (!(baseStream is PdfRawStream stream)) { log.Warn($"Could not find a stream for the object number, defaults to returning CosNull: {streamObjectNumber}"); return(CosNull.Null); } var objects = objectStreamParser.Parse(stream, objectPool); // register all objects which are referenced to be contained in object stream foreach (var next in objects) { var streamKey = new CosObjectKey(next); var offset = TryGet(streamKey, crossReferenceTable.ObjectOffsets); if (offset != null && offset == -streamObjectNumber) { var streamObject = objectPool.Get(streamKey); streamObject.SetObject(next.GetObject()); } } var matchingStreamObject = objects.FirstOrDefault(x => x.GetObjectNumber() == requestedNumber); if (matchingStreamObject != null) { return(matchingStreamObject); } log.Error($"Could not find the object {requestedNumber} in the stream for object {streamObjectNumber}. Returning CosNull."); return(CosNull.Null); }
public CosBase Parse(ParsingArguments arguments, CosObjectKey key, bool requiresExistingObject) { return(Parse(arguments.Reader, key.Number, (int)key.Generation, arguments.CachingProviders.ObjectPool, arguments.CrossReferenceTable, arguments.CachingProviders.BruteForceSearcher, arguments.IsLenientParsing, requiresExistingObject)); }
public COSArray Parse(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool) { ReadHelper.ReadExpectedChar(reader, '['); var po = new COSArray(); CosBase pbo; ReadHelper.SkipSpaces(reader); int i; while (((i = reader.Peek()) > 0) && ((char)i != ']')) { pbo = baseParser.Parse(reader, pool); if (pbo is CosObject) { // We have to check if the expected values are there or not PDFBOX-385 if (po.get(po.size() - 1) is CosInt) { var genNumber = (CosInt)po.remove(po.size() - 1); if (po.get(po.size() - 1) is CosInt) { var number = (CosInt)po.remove(po.size() - 1); CosObjectKey key = new CosObjectKey(number.AsLong(), genNumber.AsInt()); pbo = pool.Get(key); } else { // the object reference is somehow wrong pbo = null; } } else { pbo = null; } } if (pbo != null) { po.add(pbo); } else { //it could be a bad object in the array which is just skipped // LOG.warn("Corrupt object reference at offset " + seqSource.getPosition()); // This could also be an "endobj" or "endstream" which means we can assume that // the array has ended. string isThisTheEnd = ReadHelper.ReadString(reader); reader.Unread(OtherEncodings.StringAsLatin1Bytes(isThisTheEnd)); if (string.Equals(isThisTheEnd, "endobj") || string.Equals(isThisTheEnd, "endstream")) { return(po); } } ReadHelper.SkipSpaces(reader); } // read ']' reader.Read(); ReadHelper.SkipSpaces(reader); return(po); }
/** * Check the XRef table by dereferencing all objects and fixing the offset if necessary. * * @throws InvalidOperationException if something went wrong. */ public void checkXrefOffsets(IRandomAccessRead reader, CrossReferenceTable xrefTrailerResolver, bool isLenientParsing) { // repair mode isn't available in non-lenient mode if (!isLenientParsing) { return; } Dictionary <CosObjectKey, long> xrefOffset = xrefTrailerResolver.ObjectOffsets.ToDictionary(x => x.Key, x => x.Value); if (validateXrefOffsets(reader, xrefOffset)) { return; } Dictionary <CosObjectKey, long> bfCOSObjectKeyOffsets = getBFCosObjectOffsets(reader); if (bfCOSObjectKeyOffsets.Count > 0) { List <CosObjectKey> objStreams = new List <CosObjectKey>(); // find all object streams foreach (var entry in xrefOffset) { long offset = entry.Value; if (offset != null && offset < 0) { CosObjectKey objStream = new CosObjectKey(-offset, 0); if (!objStreams.Contains(objStream)) { objStreams.Add(new CosObjectKey(-offset, 0)); } } } // remove all found object streams if (objStreams.Count > 0) { foreach (CosObjectKey key in objStreams) { if (bfCOSObjectKeyOffsets.ContainsKey(key)) { // remove all parsed objects which are part of an object stream //ISet<long> objects = xrefTrailerResolver // .getContainedObjectNumbers((int)(key.Number)); //foreach (long objNr in objects) //{ // CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0); // if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0) // { // bfCOSObjectKeyOffsets.Remove(streamObjectKey); // } //} } else { // remove all objects which are part of an object stream which wasn't found //ISet<long> objects = xrefTrailerResolver // .getContainedObjectNumbers((int)(key.Number)); //foreach (long objNr in objects) //{ // xrefOffset.Remove(new CosObjectKey(objNr, 0)); //} } } } foreach (var item in bfCOSObjectKeyOffsets) { xrefOffset.Add(item.Key, item.Value); } } }