Пример #1
0
        public IReadOnlyList <CosObject> Parse(PdfRawStream stream, CosObjectPool pool)
        {
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }

            //need to first parse the header.
            var numberOfObjects = stream.Dictionary.GetIntOrDefault(CosName.N);
            var objectNumbers   = new List <long>(numberOfObjects);

            var streamObjects = new List <CosObject>(numberOfObjects);

            var bytes = stream.Decode(filterProvider);

            var reader = new RandomAccessBuffer(bytes);

            for (int i = 0; i < numberOfObjects; i++)
            {
                long objectNumber = ObjectHelper.ReadObjectNumber(reader);
                // skip offset
                ReadHelper.ReadLong(reader);
                objectNumbers.Add(objectNumber);
            }

            CosObject obj;
            CosBase   cosObject;
            int       objectCounter = 0;

            while ((cosObject = baseParser.Parse(reader, pool)) != null)
            {
                obj = new CosObject(cosObject);
                obj.SetGenerationNumber(0);

                if (objectCounter >= objectNumbers.Count)
                {
                    log.Error("/ObjStm (object stream) has more objects than /N " + numberOfObjects);
                    break;
                }

                obj.SetObjectNumber(objectNumbers[objectCounter]);
                streamObjects.Add(obj);

                // According to the spec objects within an object stream shall not be enclosed
                // by obj/endobj tags, but there are some pdfs in the wild using those tags
                // skip endobject marker if present
                if (!reader.IsEof() && reader.Peek() == 'e')
                {
                    ReadHelper.ReadLine(reader);
                }

                objectCounter++;
            }

            return(streamObjects);
        }
Пример #2
0
        private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
                                            CosObjectKey key,
                                            CosObjectPool pool,
                                            bool isLenientParsing)
        {
            reader.Seek(offset);

            var objectNumber     = ObjectHelper.ReadObjectNumber(reader);
            var objectGeneration = ObjectHelper.ReadGenerationNumber(reader);

            ReadHelper.ReadExpectedString(reader, "obj", true);

            if (objectNumber != key.Number || objectGeneration != key.Generation)
            {
                throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
            }

            ReadHelper.SkipSpaces(reader);

            var baseObject = baseParser.Parse(reader, pool);

            var endObjectKey = ReadHelper.ReadString(reader);

            var atStreamStart = string.Equals(endObjectKey, "stream");

            if (atStreamStart)
            {
                var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey);

                reader.Rewind(streamStartBytes.Length);

                baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey);
            }

            if (!string.Equals(endObjectKey, "endobj"))
            {
                var message =
                    $"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'";

                if (isLenientParsing)
                {
                    log.Warn(message);
                }
                else
                {
                    throw new InvalidOperationException(message);
                }
            }

            return(baseObject);
        }