public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, IPdfTokenScanner pdfScanner, IPageContentParser pageContentParser, IFilterProvider filterProvider, ILog log, bool clipPaths) { this.resourceStore = resourceStore; this.userSpaceUnit = userSpaceUnit; this.rotation = rotation; this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.log = log; this.clipPaths = clipPaths; // initiate CurrentClippingPath to cropBox var clippingSubpath = new PdfSubpath(); clippingSubpath.Rectangle(cropBox.BottomLeft.X, cropBox.BottomLeft.Y, cropBox.Width, cropBox.Height); var clippingPath = new PdfPath() { clippingSubpath }; clippingPath.SetClipping(FillingRule.NonZeroWinding); graphicsStack.Push(new CurrentGraphicsState() { CurrentClippingPath = clippingPath }); ColorSpaceContext = new ColorSpaceContext(GetCurrentState, resourceStore); }
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing) { this.resourceStore = resourceStore; this.userSpaceUnit = userSpaceUnit; this.isLenientParsing = isLenientParsing; graphicsStack.Push(new CurrentGraphicsState()); }
private static UserSpaceUnit GetUserSpaceUnits(DictionaryToken dictionary) { var spaceUnits = UserSpaceUnit.Default; if (dictionary.TryGet(NameToken.UserUnit, out var userUnitBase) && userUnitBase is NumericToken userUnitNumber) { spaceUnits = new UserSpaceUnit(userUnitNumber.Int); } return(spaceUnits); }
private static UserSpaceUnit GetUserSpaceUnits(PdfDictionary dictionary) { var spaceUnits = UserSpaceUnit.Default; if (dictionary.TryGetValue(CosName.USER_UNIT, out var userUnitCosBase) && userUnitCosBase is ICosNumber userUnitNumber) { spaceUnits = new UserSpaceUnit(userUnitNumber.AsInt()); } return(spaceUnits); }
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing, IPdfTokenScanner pdfScanner, XObjectFactory xObjectFactory) { this.resourceStore = resourceStore; this.userSpaceUnit = userSpaceUnit; this.isLenientParsing = isLenientParsing; this.pdfScanner = pdfScanner; this.xObjectFactory = xObjectFactory; graphicsStack.Push(new CurrentGraphicsState()); }
public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetName(CosName.TYPE); if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); LoadResources(dictionary, reader, isLenientParsing); PageContent content = default(PageContent); var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject; if (contentObject != null) { var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as PdfRawStream; if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var contents = contentStream.Decode(filterProvider); var txt = OtherEncodings.BytesAsLatin1String(contents); var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents)); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit); content = context.Process(operations); } var page = new Page(number, mediaBox, cropBox, content); return(page); }
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, bool isLenientParsing, IPdfTokenScanner pdfScanner, XObjectFactory xObjectFactory, ILog log) { this.resourceStore = resourceStore; this.userSpaceUnit = userSpaceUnit; this.rotation = rotation; this.isLenientParsing = isLenientParsing; this.pdfScanner = pdfScanner; this.xObjectFactory = xObjectFactory; this.log = log; graphicsStack.Push(new CurrentGraphicsState()); ColorSpaceContext = new ColorSpaceContext(GetCurrentState); }
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, IPdfTokenScanner pdfScanner, IPageContentParser pageContentParser, IFilterProvider filterProvider, ILog log) { this.resourceStore = resourceStore; this.userSpaceUnit = userSpaceUnit; this.rotation = rotation; this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.log = log; graphicsStack.Push(new CurrentGraphicsState()); ColorSpaceContext = new ColorSpaceContext(GetCurrentState, resourceStore); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'."); } var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource, isLenientParsing); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources, isLenientParsing); stackDepth++; } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary, isLenientParsing), pdfScanner); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
private PageContent GetContent(int pageNumber, IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, bool isLenientParsing) { var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes), log); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, isLenientParsing, pdfScanner, pageContentParser, filterProvider, log); return(context.Process(pageNumber, operations)); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page)) { log?.Error($"Page {number} had its type specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources); stackDepth++; } // Apply rotation. if (rotation.SwapsAxis) { mediaBox = new MediaBox(new PdfRectangle(mediaBox.Bounds.Bottom, mediaBox.Bounds.Left, mediaBox.Bounds.Top, mediaBox.Bounds.Right)); cropBox = new CropBox(new PdfRectangle(cropBox.Bounds.Bottom, cropBox.Bounds.Left, cropBox.Bounds.Top, cropBox.Bounds.Right)); } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content; if (!dictionary.TryGet(NameToken.Contents, out var contents)) { content = new PageContent(EmptyArray <IGraphicsStateOperation> .Instance, EmptyArray <Letter> .Instance, EmptyArray <PdfPath> .Instance, EmptyArray <Union <XObjectContentRecord, InlineImage> > .Instance, EmptyArray <MarkedContentElement> .Instance, pdfScanner, filterProvider, resourceStore); // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider, pdfScanner); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary), pdfScanner); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
private PageContent GetContent(int pageNumber, IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, bool clipPaths, MediaBox mediaBox) { var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes), log); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, pdfScanner, pageContentParser, filterProvider, log, clipPaths, new PdfVector(mediaBox.Bounds.Width, mediaBox.Bounds.Height)); return(context.Process(pageNumber, operations)); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); LoadResources(dictionary, isLenientParsing); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); foreach (var item in array.Data) { if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); } content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } var page = new Page(number, mediaBox, cropBox, content); return(page); }
private PageContent GetContent(IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, bool isLenientParsing) { var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes)); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory); return(context.Process(operations)); }
private PageContent GetContent(IReadOnlyList <byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, bool isLenientParsing) { if (Debugger.IsAttached) { var txt = OtherEncodings.BytesAsLatin1String(contentBytes.ToArray()); } var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes)); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing); return(context.Process(operations)); }