private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { MediaBox mediaBox; if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) && DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray)) { if (mediaboxArray.Length != 4 && isLenientParsing) { log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}."); mediaBox = MediaBox.A4; return(mediaBox); } mediaBox = new MediaBox(mediaboxArray.ToIntRectangle()); } else { mediaBox = pageTreeMembers.MediaBox; if (mediaBox == null) { if (isLenientParsing) { mediaBox = MediaBox.A4; } else { throw new InvalidOperationException("No mediabox was present for page: " + number); } } } return(mediaBox); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'."); } var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource, isLenientParsing); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources, isLenientParsing); stackDepth++; } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary, isLenientParsing), pdfScanner); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
private CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox) { CropBox cropBox; if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray)) { if (cropBoxArray.Length != 4) { log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox."); cropBox = new CropBox(mediaBox.Bounds); return(cropBox); } cropBox = new CropBox(cropBoxArray.ToIntRectangle(pdfScanner)); } else { cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds); } return(cropBox); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page)) { log?.Error($"Page {number} had its type specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources); stackDepth++; } // Apply rotation. if (rotation.SwapsAxis) { mediaBox = new MediaBox(new PdfRectangle(mediaBox.Bounds.Bottom, mediaBox.Bounds.Left, mediaBox.Bounds.Top, mediaBox.Bounds.Right)); cropBox = new CropBox(new PdfRectangle(cropBox.Bounds.Bottom, cropBox.Bounds.Left, cropBox.Bounds.Top, cropBox.Bounds.Right)); } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content; if (!dictionary.TryGet(NameToken.Contents, out var contents)) { content = new PageContent(EmptyArray <IGraphicsStateOperation> .Instance, EmptyArray <Letter> .Instance, EmptyArray <PdfPath> .Instance, EmptyArray <Union <XObjectContentRecord, InlineImage> > .Instance, EmptyArray <MarkedContentElement> .Instance, pdfScanner, filterProvider, resourceStore); // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider, pdfScanner); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary), pdfScanner); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'."); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); LoadResources(dictionary, isLenientParsing); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); foreach (var item in array.Data) { if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); } content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } var page = new Page(number, mediaBox, cropBox, content); return(page); }
private static CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox) { CropBox cropBox; if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && cropBoxObject is ArrayToken cropBoxArray) { var x1 = cropBoxArray.GetNumeric(0).Int; var y1 = cropBoxArray.GetNumeric(1).Int; var x2 = cropBoxArray.GetNumeric(2).Int; var y2 = cropBoxArray.GetNumeric(3).Int; cropBox = new CropBox(new PdfRectangle(x1, y1, x2, y2)); } else { cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds); } return(cropBox); }
private static CropBox GetCropBox(PdfDictionary dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox) { CropBox cropBox; if (dictionary.TryGetItemOfType(CosName.CROP_BOX, out COSArray cropBoxArray)) { var x1 = cropBoxArray.getInt(0); var y1 = cropBoxArray.getInt(1); var x2 = cropBoxArray.getInt(2); var y2 = cropBoxArray.getInt(3); cropBox = new CropBox(new PdfRectangle(x1, y1, x2, y2)); } else { cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds); } return(cropBox); }