Beispiel #1
0
        private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing)
        {
            MediaBox mediaBox;

            if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) &&
                DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray))
            {
                if (mediaboxArray.Length != 4 && isLenientParsing)
                {
                    log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}.");

                    mediaBox = MediaBox.A4;

                    return(mediaBox);
                }

                mediaBox = new MediaBox(mediaboxArray.ToIntRectangle());
            }
            else
            {
                mediaBox = pageTreeMembers.MediaBox;

                if (mediaBox == null)
                {
                    if (isLenientParsing)
                    {
                        mediaBox = MediaBox.A4;
                    }
                    else
                    {
                        throw new InvalidOperationException("No mediabox was present for page: " + number);
                    }
                }
            }

            return(mediaBox);
        }
Beispiel #2
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'.");
            }

            var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);

            if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
            {
                rotation = new PageRotationDegrees(rotateToken.Int);
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            var stackDepth = 0;

            while (pageTreeMembers.ParentResources.Count > 0)
            {
                var resource = pageTreeMembers.ParentResources.Dequeue();

                resourceStore.LoadResourceDictionary(resource, isLenientParsing);
                stackDepth++;
            }

            if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
            {
                resourceStore.LoadResourceDictionary(resources, isLenientParsing);
                stackDepth++;
            }

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            PageContent content = default(PageContent);

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                for (var i = 0; i < array.Data.Count; i++)
                {
                    var item = array.Data[i];

                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider));

                    if (i < array.Data.Count - 1)
                    {
                        bytes.Add((byte)'\n');
                    }
                }

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider);

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
            }

            var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
                                new AnnotationProvider(pdfScanner, dictionary, isLenientParsing),
                                pdfScanner);

            for (var i = 0; i < stackDepth; i++)
            {
                resourceStore.UnloadResourceDictionary();
            }

            return(page);
        }
Beispiel #3
0
        private CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox)
        {
            CropBox cropBox;

            if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) &&
                DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray))
            {
                if (cropBoxArray.Length != 4)
                {
                    log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox.");

                    cropBox = new CropBox(mediaBox.Bounds);

                    return(cropBox);
                }

                cropBox = new CropBox(cropBoxArray.ToIntRectangle(pdfScanner));
            }
            else
            {
                cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds);
            }

            return(cropBox);
        }
Beispiel #4
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page))
            {
                log?.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);

            if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
            {
                rotation = new PageRotationDegrees(rotateToken.Int);
            }

            var stackDepth = 0;

            while (pageTreeMembers.ParentResources.Count > 0)
            {
                var resource = pageTreeMembers.ParentResources.Dequeue();

                resourceStore.LoadResourceDictionary(resource);
                stackDepth++;
            }

            if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
            {
                resourceStore.LoadResourceDictionary(resources);
                stackDepth++;
            }

            // Apply rotation.
            if (rotation.SwapsAxis)
            {
                mediaBox = new MediaBox(new PdfRectangle(mediaBox.Bounds.Bottom,
                                                         mediaBox.Bounds.Left,
                                                         mediaBox.Bounds.Top,
                                                         mediaBox.Bounds.Right));
                cropBox = new CropBox(new PdfRectangle(cropBox.Bounds.Bottom,
                                                       cropBox.Bounds.Left,
                                                       cropBox.Bounds.Top,
                                                       cropBox.Bounds.Right));
            }

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            PageContent content;

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                content = new PageContent(EmptyArray <IGraphicsStateOperation> .Instance,
                                          EmptyArray <Letter> .Instance,
                                          EmptyArray <PdfPath> .Instance,
                                          EmptyArray <Union <XObjectContentRecord, InlineImage> > .Instance,
                                          EmptyArray <MarkedContentElement> .Instance,
                                          pdfScanner,
                                          filterProvider,
                                          resourceStore);
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                for (var i = 0; i < array.Data.Count; i++)
                {
                    var item = array.Data[i];

                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner));

                    if (i < array.Data.Count - 1)
                    {
                        bytes.Add((byte)'\n');
                    }
                }

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider, pdfScanner);

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox);
            }

            var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
                                new AnnotationProvider(pdfScanner, dictionary),
                                pdfScanner);

            for (var i = 0; i < stackDepth; i++)
            {
                resourceStore.UnloadResourceDictionary();
            }

            return(page);
        }
Beispiel #5
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            LoadResources(dictionary, isLenientParsing);

            PageContent content = default(PageContent);

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                foreach (var item in array.Data)
                {
                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider));
                }

                content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider);

                content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
            }

            var page = new Page(number, mediaBox, cropBox, content);

            return(page);
        }
Beispiel #6
0
        private static CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox)
        {
            CropBox cropBox;

            if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && cropBoxObject is ArrayToken cropBoxArray)
            {
                var x1 = cropBoxArray.GetNumeric(0).Int;
                var y1 = cropBoxArray.GetNumeric(1).Int;
                var x2 = cropBoxArray.GetNumeric(2).Int;
                var y2 = cropBoxArray.GetNumeric(3).Int;

                cropBox = new CropBox(new PdfRectangle(x1, y1, x2, y2));
            }
            else
            {
                cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds);
            }

            return(cropBox);
        }
Beispiel #7
0
        private static CropBox GetCropBox(PdfDictionary dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox)
        {
            CropBox cropBox;

            if (dictionary.TryGetItemOfType(CosName.CROP_BOX, out COSArray cropBoxArray))
            {
                var x1 = cropBoxArray.getInt(0);
                var y1 = cropBoxArray.getInt(1);
                var x2 = cropBoxArray.getInt(2);
                var y2 = cropBoxArray.getInt(3);

                cropBox = new CropBox(new PdfRectangle(x1, y1, x2, y2));
            }
            else
            {
                cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds);
            }

            return(cropBox);
        }