public IReadOnlyList <CosObject> Parse(PdfRawStream stream, CosObjectPool pool) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } //need to first parse the header. var numberOfObjects = stream.Dictionary.GetIntOrDefault(CosName.N); var objectNumbers = new List <long>(numberOfObjects); var streamObjects = new List <CosObject>(numberOfObjects); var bytes = stream.Decode(filterProvider); var reader = new RandomAccessBuffer(bytes); for (int i = 0; i < numberOfObjects; i++) { long objectNumber = ObjectHelper.ReadObjectNumber(reader); // skip offset ReadHelper.ReadLong(reader); objectNumbers.Add(objectNumber); } CosObject obj; CosBase cosObject; int objectCounter = 0; while ((cosObject = baseParser.Parse(reader, pool)) != null) { obj = new CosObject(cosObject); obj.SetGenerationNumber(0); if (objectCounter >= objectNumbers.Count) { log.Error("/ObjStm (object stream) has more objects than /N " + numberOfObjects); break; } obj.SetObjectNumber(objectNumbers[objectCounter]); streamObjects.Add(obj); // According to the spec objects within an object stream shall not be enclosed // by obj/endobj tags, but there are some pdfs in the wild using those tags // skip endobject marker if present if (!reader.IsEof() && reader.Peek() == 'e') { ReadHelper.ReadLine(reader); } objectCounter++; } return(streamObjects); }
private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader, CosObjectKey key, CosObjectPool pool, bool isLenientParsing) { reader.Seek(offset); var objectNumber = ObjectHelper.ReadObjectNumber(reader); var objectGeneration = ObjectHelper.ReadGenerationNumber(reader); ReadHelper.ReadExpectedString(reader, "obj", true); if (objectNumber != key.Number || objectGeneration != key.Generation) { throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}"); } ReadHelper.SkipSpaces(reader); var baseObject = baseParser.Parse(reader, pool); var endObjectKey = ReadHelper.ReadString(reader); var atStreamStart = string.Equals(endObjectKey, "stream"); if (atStreamStart) { var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey); reader.Rewind(streamStartBytes.Length); baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey); } if (!string.Equals(endObjectKey, "endobj")) { var message = $"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'"; if (isLenientParsing) { log.Warn(message); } else { throw new InvalidOperationException(message); } } return(baseObject); }