Пример #1
0
        public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation,
                                      IPdfTokenScanner pdfScanner,
                                      IPageContentParser pageContentParser,
                                      IFilterProvider filterProvider,
                                      ILog log,
                                      bool clipPaths)
        {
            this.resourceStore     = resourceStore;
            this.userSpaceUnit     = userSpaceUnit;
            this.rotation          = rotation;
            this.pdfScanner        = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
            this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser));
            this.filterProvider    = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
            this.log       = log;
            this.clipPaths = clipPaths;

            // initiate CurrentClippingPath to cropBox
            var clippingSubpath = new PdfSubpath();

            clippingSubpath.Rectangle(cropBox.BottomLeft.X, cropBox.BottomLeft.Y, cropBox.Width, cropBox.Height);
            var clippingPath = new PdfPath()
            {
                clippingSubpath
            };

            clippingPath.SetClipping(FillingRule.NonZeroWinding);

            graphicsStack.Push(new CurrentGraphicsState()
            {
                CurrentClippingPath = clippingPath
            });
            ColorSpaceContext = new ColorSpaceContext(GetCurrentState, resourceStore);
        }
Пример #2
0
 public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing)
 {
     this.resourceStore    = resourceStore;
     this.userSpaceUnit    = userSpaceUnit;
     this.isLenientParsing = isLenientParsing;
     graphicsStack.Push(new CurrentGraphicsState());
 }
Пример #3
0
        private static UserSpaceUnit GetUserSpaceUnits(DictionaryToken dictionary)
        {
            var spaceUnits = UserSpaceUnit.Default;

            if (dictionary.TryGet(NameToken.UserUnit, out var userUnitBase) && userUnitBase is NumericToken userUnitNumber)
            {
                spaceUnits = new UserSpaceUnit(userUnitNumber.Int);
            }

            return(spaceUnits);
        }
Пример #4
0
        private static UserSpaceUnit GetUserSpaceUnits(PdfDictionary dictionary)
        {
            var spaceUnits = UserSpaceUnit.Default;

            if (dictionary.TryGetValue(CosName.USER_UNIT, out var userUnitCosBase) && userUnitCosBase is ICosNumber userUnitNumber)
            {
                spaceUnits = new UserSpaceUnit(userUnitNumber.AsInt());
            }

            return(spaceUnits);
        }
Пример #5
0
 public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
                               IPdfTokenScanner pdfScanner,
                               XObjectFactory xObjectFactory)
 {
     this.resourceStore    = resourceStore;
     this.userSpaceUnit    = userSpaceUnit;
     this.isLenientParsing = isLenientParsing;
     this.pdfScanner       = pdfScanner;
     this.xObjectFactory   = xObjectFactory;
     graphicsStack.Push(new CurrentGraphicsState());
 }
Пример #6
0
        public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetName(CosName.TYPE);

            if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            LoadResources(dictionary, reader, isLenientParsing);

            PageContent content = default(PageContent);

            var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;

            if (contentObject != null)
            {
                var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as PdfRawStream;

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var contents = contentStream.Decode(filterProvider);

                var txt = OtherEncodings.BytesAsLatin1String(contents);

                var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));

                var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);

                content = context.Process(operations);
            }

            var page = new Page(number, mediaBox, cropBox, content);

            return(page);
        }
Пример #7
0
 public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, bool isLenientParsing,
                               IPdfTokenScanner pdfScanner,
                               XObjectFactory xObjectFactory,
                               ILog log)
 {
     this.resourceStore    = resourceStore;
     this.userSpaceUnit    = userSpaceUnit;
     this.rotation         = rotation;
     this.isLenientParsing = isLenientParsing;
     this.pdfScanner       = pdfScanner;
     this.xObjectFactory   = xObjectFactory;
     this.log = log;
     graphicsStack.Push(new CurrentGraphicsState());
     ColorSpaceContext = new ColorSpaceContext(GetCurrentState);
 }
Пример #8
0
 public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation,
                               IPdfTokenScanner pdfScanner,
                               IPageContentParser pageContentParser,
                               IFilterProvider filterProvider,
                               ILog log)
 {
     this.resourceStore     = resourceStore;
     this.userSpaceUnit     = userSpaceUnit;
     this.rotation          = rotation;
     this.pdfScanner        = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
     this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser));
     this.filterProvider    = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
     this.log = log;
     graphicsStack.Push(new CurrentGraphicsState());
     ColorSpaceContext = new ColorSpaceContext(GetCurrentState, resourceStore);
 }
Пример #9
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'.");
            }

            var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);

            if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
            {
                rotation = new PageRotationDegrees(rotateToken.Int);
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            var stackDepth = 0;

            while (pageTreeMembers.ParentResources.Count > 0)
            {
                var resource = pageTreeMembers.ParentResources.Dequeue();

                resourceStore.LoadResourceDictionary(resource, isLenientParsing);
                stackDepth++;
            }

            if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
            {
                resourceStore.LoadResourceDictionary(resources, isLenientParsing);
                stackDepth++;
            }

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            PageContent content = default(PageContent);

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                for (var i = 0; i < array.Data.Count; i++)
                {
                    var item = array.Data[i];

                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider));

                    if (i < array.Data.Count - 1)
                    {
                        bytes.Add((byte)'\n');
                    }
                }

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider);

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
            }

            var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
                                new AnnotationProvider(pdfScanner, dictionary, isLenientParsing),
                                pdfScanner);

            for (var i = 0; i < stackDepth; i++)
            {
                resourceStore.UnloadResourceDictionary();
            }

            return(page);
        }
Пример #10
0
        private PageContent GetContent(int pageNumber, IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit,
                                       PageRotationDegrees rotation,
                                       bool isLenientParsing)
        {
            var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes),
                                                     log);

            var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, isLenientParsing, pdfScanner,
                                                     pageContentParser,
                                                     filterProvider,
                                                     log);

            return(context.Process(pageNumber, operations));
        }
Пример #11
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page))
            {
                log?.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);

            if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
            {
                rotation = new PageRotationDegrees(rotateToken.Int);
            }

            var stackDepth = 0;

            while (pageTreeMembers.ParentResources.Count > 0)
            {
                var resource = pageTreeMembers.ParentResources.Dequeue();

                resourceStore.LoadResourceDictionary(resource);
                stackDepth++;
            }

            if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
            {
                resourceStore.LoadResourceDictionary(resources);
                stackDepth++;
            }

            // Apply rotation.
            if (rotation.SwapsAxis)
            {
                mediaBox = new MediaBox(new PdfRectangle(mediaBox.Bounds.Bottom,
                                                         mediaBox.Bounds.Left,
                                                         mediaBox.Bounds.Top,
                                                         mediaBox.Bounds.Right));
                cropBox = new CropBox(new PdfRectangle(cropBox.Bounds.Bottom,
                                                       cropBox.Bounds.Left,
                                                       cropBox.Bounds.Top,
                                                       cropBox.Bounds.Right));
            }

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            PageContent content;

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                content = new PageContent(EmptyArray <IGraphicsStateOperation> .Instance,
                                          EmptyArray <Letter> .Instance,
                                          EmptyArray <PdfPath> .Instance,
                                          EmptyArray <Union <XObjectContentRecord, InlineImage> > .Instance,
                                          EmptyArray <MarkedContentElement> .Instance,
                                          pdfScanner,
                                          filterProvider,
                                          resourceStore);
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                for (var i = 0; i < array.Data.Count; i++)
                {
                    var item = array.Data[i];

                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner));

                    if (i < array.Data.Count - 1)
                    {
                        bytes.Add((byte)'\n');
                    }
                }

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider, pdfScanner);

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox);
            }

            var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
                                new AnnotationProvider(pdfScanner, dictionary),
                                pdfScanner);

            for (var i = 0; i < stackDepth; i++)
            {
                resourceStore.UnloadResourceDictionary();
            }

            return(page);
        }
Пример #12
0
        private PageContent GetContent(int pageNumber, IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit,
                                       PageRotationDegrees rotation, bool clipPaths, MediaBox mediaBox)
        {
            var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes),
                                                     log);

            var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, pdfScanner,
                                                     pageContentParser,
                                                     filterProvider,
                                                     log,
                                                     clipPaths,
                                                     new PdfVector(mediaBox.Bounds.Width, mediaBox.Bounds.Height));

            return(context.Process(pageNumber, operations));
        }
Пример #13
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            LoadResources(dictionary, isLenientParsing);

            PageContent content = default(PageContent);

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                foreach (var item in array.Data)
                {
                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider));
                }

                content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider);

                content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
            }

            var page = new Page(number, mediaBox, cropBox, content);

            return(page);
        }
Пример #14
0
        private PageContent GetContent(IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, bool isLenientParsing)
        {
            var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));

            var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory);

            return(context.Process(operations));
        }
Пример #15
0
        private PageContent GetContent(IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, bool isLenientParsing)
        {
            if (Debugger.IsAttached)
            {
                var txt = OtherEncodings.BytesAsLatin1String(contentBytes.ToArray());
            }

            var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));

            var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing);

            return(context.Process(operations));
        }