public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, IResourceStore resourceStore) { if (xObject == null) { throw new ArgumentNullException(nameof(xObject)); } if (xObject.Type != XObjectType.Image) { throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}."); } var dictionary = xObject.Stream.StreamDictionary; var bounds = xObject.AppliedTransformation.Transform(new PdfRectangle(new PdfPoint(0, 0), new PdfPoint(1, 1))); var width = dictionary.Get <NumericToken>(NameToken.Width, pdfScanner).Int; var height = dictionary.Get <NumericToken>(NameToken.Height, pdfScanner).Int; var isImageMask = dictionary.TryGet(NameToken.ImageMask, pdfScanner, out BooleanToken isMaskToken) && isMaskToken.Data; var isJpxDecode = dictionary.TryGet(NameToken.Filter, out var token) && token is NameToken filterName && filterName.Equals(NameToken.JpxDecode); int bitsPerComponent = 0; if (!isImageMask && !isJpxDecode) { if (!dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken bitsPerComponentToken)) { throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}."); } bitsPerComponent = bitsPerComponentToken.Int; } else if (isImageMask) { bitsPerComponent = 1; } var intent = xObject.DefaultRenderingIntent; if (dictionary.TryGet(NameToken.Intent, out NameToken renderingIntentToken)) { intent = renderingIntentToken.Data.ToRenderingIntent(); } var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken interpolateToken) && interpolateToken.Data; DictionaryToken filterDictionary = xObject.Stream.StreamDictionary; if (xObject.Stream.StreamDictionary.TryGet(NameToken.Filter, out var filterToken) && filterToken is IndirectReferenceToken) { if (filterDictionary.TryGet(NameToken.Filter, pdfScanner, out ArrayToken filterArray)) { filterDictionary = filterDictionary.With(NameToken.Filter, filterArray); } else if (filterDictionary.TryGet(NameToken.Filter, pdfScanner, out NameToken filterNameToken)) { filterDictionary = filterDictionary.With(NameToken.Filter, filterNameToken); } else { filterDictionary = null; } } var supportsFilters = filterDictionary != null; if (filterDictionary != null) { var filters = filterProvider.GetFilters(filterDictionary); foreach (var filter in filters) { if (!filter.IsSupported) { supportsFilters = false; break; } } } var decodedBytes = supportsFilters ? new Lazy <IReadOnlyList <byte> >(() => xObject.Stream.Decode(filterProvider)) : null; var decode = EmptyArray <decimal> .Instance; if (dictionary.TryGet(NameToken.Decode, pdfScanner, out ArrayToken decodeArrayToken)) { decode = decodeArrayToken.Data.OfType <NumericToken>() .Select(x => x.Data) .ToArray(); } var colorSpace = default(ColorSpace?); if (!isImageMask) { if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out NameToken colorSpaceNameToken) && TryMapColorSpace(colorSpaceNameToken, resourceStore, out var colorSpaceResult)) { colorSpace = colorSpaceResult; } else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken colorSpaceArrayToken) && colorSpaceArrayToken.Length > 0) { var first = colorSpaceArrayToken.Data[0]; if ((first is NameToken firstColorSpaceName) && TryMapColorSpace(firstColorSpaceName, resourceStore, out colorSpaceResult)) { colorSpace = colorSpaceResult; } } else if (!isJpxDecode) { colorSpace = xObject.DefaultColorSpace; } } var details = ColorSpaceDetailsParser.GetColorSpaceDetails(colorSpace, dictionary, pdfScanner, resourceStore, filterProvider); return(new XObjectImage( bounds, width, height, bitsPerComponent, colorSpace, isJpxDecode, isImageMask, intent, interpolate, decode, dictionary, xObject.Stream.Data, decodedBytes, details)); }
private static VerticalWritingMetrics ReadVerticalDisplacements(DictionaryToken dict) { var verticalDisplacements = new Dictionary <int, double>(); var positionVectors = new Dictionary <int, PdfVector>(); // The default position vector and displacement vector are specified by the DW2 entry. VerticalVectorComponents dw2; if (!dict.TryGet(NameToken.Dw2, out var dw2Token) || !(dw2Token is ArrayToken arrayVerticalComponents)) { dw2 = VerticalVectorComponents.Default; } else { var position = ((NumericToken)arrayVerticalComponents.Data[0]).Double; var displacement = ((NumericToken)arrayVerticalComponents.Data[1]).Double; dw2 = new VerticalVectorComponents(position, displacement); } // vertical metrics for individual CIDs. if (dict.TryGet(NameToken.W2, out var w2Token) && w2Token is ArrayToken w2) { for (var i = 0; i < w2.Data.Count; i++) { var c = (NumericToken)w2.Data[i]; var next = w2.Data[++i]; if (next is ArrayToken array) { for (var j = 0; j < array.Data.Count; j++) { var cid = c.Int + j; // ReSharper disable InconsistentNaming var w1y = (NumericToken)array.Data[j]; var v1x = (NumericToken)array.Data[++j]; var v1y = (NumericToken)array.Data[++j]; verticalDisplacements[cid] = w1y.Double; positionVectors[cid] = new PdfVector(v1x.Double, v1y.Double); } } else { var first = c.Int; var last = ((NumericToken)next).Int; var w1y = (NumericToken)w2.Data[++i]; var v1x = (NumericToken)w2.Data[++i]; var v1y = (NumericToken)w2.Data[++i]; // ReSharper restore InconsistentNaming for (var cid = first; cid <= last; cid++) { verticalDisplacements[cid] = w1y.Double; positionVectors[cid] = new PdfVector(v1x.Double, v1y.Double); } } } } return(new VerticalWritingMetrics(dw2, verticalDisplacements, positionVectors)); }
private AcroFieldBase GetAcroField(DictionaryToken fieldDictionary, Catalog catalog, IReadOnlyList <DictionaryToken> parentDictionaries) { var(combinedFieldDictionary, inheritsValue) = CreateInheritedDictionary(fieldDictionary, parentDictionaries); fieldDictionary = combinedFieldDictionary; fieldDictionary.TryGet(NameToken.Ft, tokenScanner, out NameToken fieldType); fieldDictionary.TryGet(NameToken.Ff, tokenScanner, out NumericToken fieldFlagsToken); var kids = new List <(bool hasParent, DictionaryToken dictionary)>(); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.Kids, tokenScanner, out ArrayToken kidsToken)) { foreach (var kid in kidsToken.Data) { if (!(kid is IndirectReferenceToken kidReferenceToken)) { throw new PdfDocumentFormatException($"AcroForm kids should only contain indirect reference, instead got: {kid}."); } var kidObject = tokenScanner.Get(kidReferenceToken.Data); if (kidObject.Data is DictionaryToken kidDictionaryToken) { var hasParent = kidDictionaryToken.TryGet(NameToken.Parent, out IndirectReferenceToken _); kids.Add((hasParent, kidDictionaryToken)); } else { throw new PdfDocumentFormatException($"Unexpected type of kid in AcroForm field. Expected dictionary but got: {kidObject.Data}."); } } } fieldDictionary.TryGetOptionalStringDirect(NameToken.T, tokenScanner, out var partialFieldName); fieldDictionary.TryGetOptionalStringDirect(NameToken.Tu, tokenScanner, out var alternateFieldName); fieldDictionary.TryGetOptionalStringDirect(NameToken.Tm, tokenScanner, out var mappingName); fieldDictionary.TryGet(NameToken.Parent, out IndirectReferenceToken parentReferenceToken); var information = new AcroFieldCommonInformation(parentReferenceToken?.Data, partialFieldName, alternateFieldName, mappingName); int?pageNumber = null; if (fieldDictionary.TryGet(NameToken.P, tokenScanner, out IndirectReferenceToken pageReference)) { pageNumber = catalog.GetPageByReference(pageReference.Data)?.PageNumber; } PdfRectangle?bounds = null; if (fieldDictionary.TryGet(NameToken.Rect, tokenScanner, out ArrayToken rectArray) && rectArray.Length == 4) { bounds = rectArray.ToRectangle(); } var newParentDictionaries = new List <DictionaryToken>(parentDictionaries) { fieldDictionary }; var children = new List <AcroFieldBase>(kids.Count); foreach (var kid in kids) { if (!kid.hasParent) { // Is a widget annotation dictionary. continue; } children.Add(GetAcroField(kid.dictionary, catalog, newParentDictionaries)); } var fieldFlags = (uint)(fieldFlagsToken?.Long ?? 0); AcroFieldBase result; if (fieldType == null) { result = new AcroNonTerminalField(fieldDictionary, "Non-Terminal Field", fieldFlags, information, AcroFieldType.Unknown, children); } else if (fieldType == NameToken.Btn) { var buttonFlags = (AcroButtonFieldFlags)fieldFlags; if (buttonFlags.HasFlag(AcroButtonFieldFlags.Radio)) { if (children.Count > 0) { result = new AcroRadioButtonsField(fieldDictionary, fieldType, buttonFlags, information, children); } else { var(isChecked, valueToken) = GetCheckedState(fieldDictionary, inheritsValue); var field = new AcroRadioButtonField(fieldDictionary, fieldType, buttonFlags, information, pageNumber, bounds, valueToken, isChecked); result = field; } } else if (buttonFlags.HasFlag(AcroButtonFieldFlags.PushButton)) { var field = new AcroPushButtonField(fieldDictionary, fieldType, buttonFlags, information, pageNumber, bounds); result = field; } else { if (children.Count > 0) { result = new AcroCheckboxesField(fieldDictionary, fieldType, buttonFlags, information, children); } else { var(isChecked, valueToken) = GetCheckedState(fieldDictionary, inheritsValue); var field = new AcroCheckboxField(fieldDictionary, fieldType, buttonFlags, information, valueToken, isChecked, pageNumber, bounds); result = field; } } } else if (fieldType == NameToken.Tx) { result = GetTextField(fieldDictionary, fieldType, fieldFlags, information, pageNumber, bounds); } else if (fieldType == NameToken.Ch) { result = GetChoiceField(fieldDictionary, fieldType, fieldFlags, information, pageNumber, bounds); } else if (fieldType == NameToken.Sig) { var field = new AcroSignatureField(fieldDictionary, fieldType, fieldFlags, information, pageNumber, bounds); result = field; } else { throw new PdfDocumentFormatException($"Unexpected type for field in AcroForm: {fieldType}."); } return(result); }
private AcroFieldBase GetChoiceField(DictionaryToken fieldDictionary, NameToken fieldType, uint fieldFlags, AcroFieldCommonInformation information, int?pageNumber, PdfRectangle?bounds) { var selectedOptions = EmptyArray <string> .Instance; if (fieldDictionary.TryGet(NameToken.V, out var valueToken)) { if (DirectObjectFinder.TryGet(valueToken, tokenScanner, out StringToken valueString)) { selectedOptions = new[] { valueString.Data }; } else if (DirectObjectFinder.TryGet(valueToken, tokenScanner, out HexToken valueHex)) { selectedOptions = new[] { valueHex.Data }; } else if (DirectObjectFinder.TryGet(valueToken, tokenScanner, out ArrayToken valueArray)) { selectedOptions = new string[valueArray.Length]; for (var i = 0; i < valueArray.Length; i++) { var valueOptToken = valueArray.Data[i]; if (DirectObjectFinder.TryGet(valueOptToken, tokenScanner, out StringToken valueOptString)) { selectedOptions[i] = valueOptString.Data; } else if (DirectObjectFinder.TryGet(valueOptToken, tokenScanner, out HexToken valueOptHex)) { selectedOptions[i] = valueOptHex.Data; } } } } var selectedIndices = default(int[]); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.I, tokenScanner, out ArrayToken indicesArray)) { selectedIndices = new int[indicesArray.Length]; for (var i = 0; i < indicesArray.Data.Count; i++) { var token = indicesArray.Data[i]; var numericToken = DirectObjectFinder.Get <NumericToken>(token, tokenScanner); selectedIndices[i] = numericToken.Int; } } var options = new List <AcroChoiceOption>(); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.Opt, tokenScanner, out ArrayToken optionsArrayToken)) { for (var i = 0; i < optionsArrayToken.Data.Count; i++) { var optionToken = optionsArrayToken.Data[i]; if (DirectObjectFinder.TryGet(optionToken, tokenScanner, out StringToken optionStringToken)) { var name = optionStringToken.Data; var isSelected = IsChoiceSelected(selectedOptions, selectedIndices, i, name); options.Add(new AcroChoiceOption(i, isSelected, optionStringToken.Data)); } else if (DirectObjectFinder.TryGet(optionToken, tokenScanner, out HexToken optionHexToken)) { var name = optionHexToken.Data; var isSelected = IsChoiceSelected(selectedOptions, selectedIndices, i, name); options.Add(new AcroChoiceOption(i, isSelected, optionHexToken.Data)); } else if (DirectObjectFinder.TryGet(optionToken, tokenScanner, out ArrayToken optionArrayToken)) { if (optionArrayToken.Length != 2) { throw new PdfDocumentFormatException($"An option array containing array elements should contain 2 strings, instead got: {optionArrayToken}."); } string exportValue; if (DirectObjectFinder.TryGet(optionArrayToken.Data[0], tokenScanner, out StringToken exportValueStringToken)) { exportValue = exportValueStringToken.Data; } else if (DirectObjectFinder.TryGet(optionArrayToken.Data[0], tokenScanner, out HexToken exportValueHexToken)) { exportValue = exportValueHexToken.Data; } else { throw new PdfDocumentFormatException($"An option array array element's first value should be the export value string, instead got: {optionArrayToken.Data[0]}."); } string name; if (DirectObjectFinder.TryGet(optionArrayToken.Data[1], tokenScanner, out StringToken nameStringToken)) { name = nameStringToken.Data; } else if (DirectObjectFinder.TryGet(optionArrayToken.Data[1], tokenScanner, out HexToken nameHexToken)) { name = nameHexToken.Data; } else { throw new PdfDocumentFormatException($"An option array array element's second value should be the option name string, instead got: {optionArrayToken.Data[1]}."); } var isSelected = IsChoiceSelected(selectedOptions, selectedIndices, i, name); options.Add(new AcroChoiceOption(i, isSelected, name, exportValue)); } else { throw new PdfDocumentFormatException($"An option array should contain either strings or 2 element arrays, instead got: {optionToken}."); } } } var choiceFlags = (AcroChoiceFieldFlags)fieldFlags; if (choiceFlags.HasFlag(AcroChoiceFieldFlags.Combo)) { var field = new AcroComboBoxField(fieldDictionary, fieldType, choiceFlags, information, options, selectedOptions, selectedIndices, pageNumber, bounds); return(field); } var topIndex = default(int?); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.Ti, tokenScanner, out NumericToken topIndexToken)) { topIndex = topIndexToken.Int; } return(new AcroListBoxField(fieldDictionary, fieldType, choiceFlags, information, options, selectedOptions, selectedIndices, topIndex, pageNumber, bounds)); }
public IEnumerable <Annotation> GetAnnotations() { if (!pageDictionary.TryGet(NameToken.Annots, out IToken annotationsToken) || !DirectObjectFinder.TryGet(annotationsToken, tokenScanner, out ArrayToken annotationsArray)) { yield break; } foreach (var token in annotationsArray.Data) { if (!DirectObjectFinder.TryGet(token, tokenScanner, out DictionaryToken annotationDictionary)) { if (isLenientParsing) { continue; } throw new PdfDocumentFormatException($"The annotations dictionary contained an annotation which wasn't a dictionary: {token}."); } if (!isLenientParsing && annotationDictionary.TryGet(NameToken.Type, out NameToken dictionaryType)) { if (dictionaryType != NameToken.Annot) { throw new PdfDocumentFormatException($"The annotations dictionary contained a non-annotation type dictionary: {annotationDictionary}."); } } var type = annotationDictionary.Get <NameToken>(NameToken.Subtype, tokenScanner); var annotationType = type.ToAnnotationType(); var rectangle = annotationDictionary.Get <ArrayToken>(NameToken.Rect, tokenScanner).ToRectangle(); var contents = GetNamedString(NameToken.Contents, annotationDictionary); var name = GetNamedString(NameToken.Nm, annotationDictionary); var modifiedDate = GetNamedString(NameToken.M, annotationDictionary); var flags = (AnnotationFlags)0; if (annotationDictionary.TryGet(NameToken.F, out var flagsToken) && DirectObjectFinder.TryGet(flagsToken, tokenScanner, out NumericToken flagsNumericToken)) { flags = (AnnotationFlags)flagsNumericToken.Int; } var border = AnnotationBorder.Default; if (annotationDictionary.TryGet(NameToken.Border, out var borderToken) && DirectObjectFinder.TryGet(borderToken, tokenScanner, out ArrayToken borderArray) && borderArray.Length >= 3) { var horizontal = borderArray.GetNumeric(0).Data; var vertical = borderArray.GetNumeric(1).Data; var width = borderArray.GetNumeric(2).Data; var dashes = default(IReadOnlyList <decimal>); if (borderArray.Length == 4 && borderArray.Data[4] is ArrayToken dashArray) { dashes = dashArray.Data.OfType <NumericToken>().Select(x => x.Data).ToList(); } border = new AnnotationBorder(horizontal, vertical, width, dashes); } yield return(new Annotation(annotationDictionary, annotationType, rectangle, contents, name, modifiedDate, flags, border)); } }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'."); } var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox, isLenientParsing); var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource, isLenientParsing); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources, isLenientParsing); stackDepth++; } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary, isLenientParsing)); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
/// <summary> /// Extract bookmarks recursively. /// </summary> private void ReadBookmarksRecursively(DictionaryToken nodeDictionary, int level, bool readSiblings, HashSet <IndirectReference> seen, IReadOnlyDictionary <string, ExplicitDestination> namedDestinations, Catalog catalog, List <BookmarkNode> list) { // 12.3 Document-Level Navigation // 12.3.3 Document Outline - Title // (Required) The text that shall be displayed on the screen for this item. if (!nodeDictionary.TryGetOptionalStringDirect(NameToken.Title, pdfScanner, out var title)) { throw new PdfDocumentFormatException($"Invalid title for outline (bookmark) node: {nodeDictionary}."); } var children = new List <BookmarkNode>(); if (nodeDictionary.TryGet(NameToken.First, pdfScanner, out DictionaryToken firstChild)) { ReadBookmarksRecursively(firstChild, level + 1, true, seen, namedDestinations, catalog, children); } BookmarkNode bookmark; if (nodeDictionary.TryGet(NameToken.Dest, pdfScanner, out ArrayToken destArray) && TryGetExplicitDestination(destArray, catalog, log, out var destination)) { bookmark = new DocumentBookmarkNode(title, level, destination, children); } else if (nodeDictionary.TryGet(NameToken.Dest, pdfScanner, out IDataToken <string> destStringToken)) { // 12.3.2.3 Named Destinations if (namedDestinations.TryGetValue(destStringToken.Data, out destination)) { bookmark = new DocumentBookmarkNode(title, level, destination, children); } else if (!isLenientParsing) { throw new PdfDocumentFormatException($"Invalid destination name for bookmark node: {destStringToken.Data}."); } else { return; } } else if (nodeDictionary.TryGet(NameToken.A, pdfScanner, out DictionaryToken actionDictionary) && TryGetAction(actionDictionary, catalog, pdfScanner, namedDestinations, log, out var actionResult)) { if (actionResult.isExternal) { bookmark = new ExternalBookmarkNode(title, level, actionResult.externalFileName, children); } else if (actionResult.destination != null) { bookmark = new DocumentBookmarkNode(title, level, actionResult.destination, children); } else if (!isLenientParsing) { throw new PdfDocumentFormatException($"Invalid action for bookmark node: {actionDictionary}."); } else { return; } } else { log.Error($"No /Dest(ination) or /A(ction) entry found for bookmark node: {nodeDictionary}."); return; } list.Add(bookmark); if (!readSiblings) { return; } // Walk all siblings if this was the first child. var current = nodeDictionary; while (true) { if (!current.TryGet(NameToken.Next, out IndirectReferenceToken nextReference) || !seen.Add(nextReference.Data)) { break; } current = DirectObjectFinder.Get <DictionaryToken>(nextReference, pdfScanner); if (current == null) { break; } ReadBookmarksRecursively(current, level, false, seen, namedDestinations, catalog, list); } }
private static PageTreeNode ProcessPagesNode(IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing, ref int pageNumber) { var isPage = false; if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type)) { if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); } if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; } } else { isPage = type.Equals(NameToken.Page); if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); } } if (!isLenientParsing && !isRoot) { if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); } if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); } } if (isPage) { pageNumber++; var thisNode = new PageTreeNode(nodeDictionary, reference, true, pageNumber, EmptyArray <PageTreeNode> .Instance); return(thisNode); } if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids)) { if (!isLenientParsing) { throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {nodeDictionary}."); } kids = new ArrayToken(EmptyArray <IToken> .Instance); } var nodeChildren = new List <PageTreeNode>(); foreach (var kid in kids.Data) { if (!(kid is IndirectReferenceToken kidRef)) { throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}."); } if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken)) { throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}."); } var kidNode = ProcessPagesNode(kidRef.Data, kidDictionaryToken, reference, false, pdfTokenScanner, isLenientParsing, ref pageNumber); nodeChildren.Add(kidNode); } return(new PageTreeNode(nodeDictionary, reference, false, null, nodeChildren)); }
public IFont Generate(DictionaryToken dictionary) { var usingStandard14Only = !dictionary.ContainsKey(NameToken.FirstChar) || !dictionary.ContainsKey(NameToken.Widths); if (usingStandard14Only) { // TODO: some fonts combine standard 14 font with other metrics. if (!dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) || !(baseFontToken is NameToken standard14Name)) { throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}"); } var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Data); if (metrics != null) { var overrideEncoding = encodingReader.Read(dictionary); return(new Type1Standard14Font(metrics, overrideEncoding)); } } int firstCharacter, lastCharacter; double[] widths; if (!usingStandard14Only) { firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary); lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary); widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary); } else { firstCharacter = 0; lastCharacter = 0; widths = EmptyArray <double> .Instance; } if (!dictionary.TryGet(NameToken.FontDescriptor, out var _)) { if (dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) && DirectObjectFinder.TryGet(baseFontToken, pdfScanner, out NameToken baseFontName)) { var metrics = Standard14.GetAdobeFontMetrics(baseFontName.Data); var overrideEncoding = encodingReader.Read(dictionary); return(new Type1Standard14Font(metrics, overrideEncoding)); } } var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, dictionary); var font = ParseFontProgram(descriptor); var name = FontDictionaryAccessHelper.GetName(pdfScanner, dictionary, descriptor); CMap toUnicodeCMap = null; if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj)) { var toUnicode = DirectObjectFinder.Get <StreamToken>(toUnicodeObj, pdfScanner); var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap)); } } var fromFont = default(Encoding); if (font != null) { if (font.TryGetFirst(out var t1Font)) { fromFont = t1Font.Encoding != null ? new BuiltInEncoding(t1Font.Encoding) : default(Encoding); } else if (font.TryGetSecond(out var cffFont)) { fromFont = cffFont.FirstFont?.Encoding; } } var encoding = encodingReader.Read(dictionary, descriptor, fromFont); if (encoding == null && font != null && font.TryGetFirst(out var t1FontReplacment)) { encoding = new BuiltInEncoding(t1FontReplacment.Encoding); } return(new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap, font)); }
public static ColorSpaceDetails GetColorSpaceDetails(ColorSpace?colorSpace, DictionaryToken imageDictionary, IPdfTokenScanner scanner, IResourceStore resourceStore, ILookupFilterProvider filterProvider, bool cannotRecurse = false) { if (!colorSpace.HasValue) { return(UnsupportedColorSpaceDetails.Instance); } switch (colorSpace.Value) { case ColorSpace.DeviceGray: return(DeviceGrayColorSpaceDetails.Instance); case ColorSpace.DeviceRGB: return(DeviceRgbColorSpaceDetails.Instance); case ColorSpace.DeviceCMYK: return(DeviceCmykColorSpaceDetails.Instance); case ColorSpace.CalGray: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.CalRGB: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.Lab: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.ICCBased: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.Indexed: { if (cannotRecurse) { return(UnsupportedColorSpaceDetails.Instance); } if (!imageDictionary.TryGet(NameToken.ColorSpace, scanner, out ArrayToken colorSpaceArray) || colorSpaceArray.Length != 4) { // Error instead? return(UnsupportedColorSpaceDetails.Instance); } var first = colorSpaceArray[0] as NameToken; if (first == null || !ColorSpaceMapper.TryMap(first, resourceStore, out var innerColorSpace) || innerColorSpace != ColorSpace.Indexed) { return(UnsupportedColorSpaceDetails.Instance); } var second = colorSpaceArray[1]; ColorSpaceDetails baseDetails; if (DirectObjectFinder.TryGet(second, scanner, out NameToken baseColorSpaceNameToken) && ColorSpaceMapper.TryMap(baseColorSpaceNameToken, resourceStore, out var baseColorSpaceName)) { baseDetails = GetColorSpaceDetails( baseColorSpaceName, imageDictionary, scanner, resourceStore, filterProvider, true); } else if (DirectObjectFinder.TryGet(second, scanner, out ArrayToken baseColorSpaceArrayToken) && baseColorSpaceArrayToken.Length > 0 && baseColorSpaceArrayToken[0] is NameToken baseColorSpaceArrayNameToken && ColorSpaceMapper.TryMap(baseColorSpaceArrayNameToken, resourceStore, out var baseColorSpaceArrayColorSpace)) { var pseudoImageDictionary = new DictionaryToken( new Dictionary <NameToken, IToken> { { NameToken.ColorSpace, baseColorSpaceArrayToken } }); baseDetails = GetColorSpaceDetails( baseColorSpaceArrayColorSpace, pseudoImageDictionary, scanner, resourceStore, filterProvider, true); }
public IEnumerable <Annotation> GetAnnotations() { if (!pageDictionary.TryGet(NameToken.Annots, tokenScanner, out ArrayToken annotationsArray)) { yield break; } foreach (var token in annotationsArray.Data) { if (!DirectObjectFinder.TryGet(token, tokenScanner, out DictionaryToken annotationDictionary)) { continue; } var type = annotationDictionary.Get <NameToken>(NameToken.Subtype, tokenScanner); var annotationType = type.ToAnnotationType(); var rectangle = annotationDictionary.Get <ArrayToken>(NameToken.Rect, tokenScanner).ToRectangle(tokenScanner); var contents = GetNamedString(NameToken.Contents, annotationDictionary); var name = GetNamedString(NameToken.Nm, annotationDictionary); var modifiedDate = GetNamedString(NameToken.M, annotationDictionary); var flags = (AnnotationFlags)0; if (annotationDictionary.TryGet(NameToken.F, out var flagsToken) && DirectObjectFinder.TryGet(flagsToken, tokenScanner, out NumericToken flagsNumericToken)) { flags = (AnnotationFlags)flagsNumericToken.Int; } var border = AnnotationBorder.Default; if (annotationDictionary.TryGet(NameToken.Border, out var borderToken) && DirectObjectFinder.TryGet(borderToken, tokenScanner, out ArrayToken borderArray) && borderArray.Length >= 3) { var horizontal = borderArray.GetNumeric(0).Data; var vertical = borderArray.GetNumeric(1).Data; var width = borderArray.GetNumeric(2).Data; var dashes = default(IReadOnlyList <decimal>); if (borderArray.Length == 4 && borderArray.Data[4] is ArrayToken dashArray) { dashes = dashArray.Data.OfType <NumericToken>().Select(x => x.Data).ToList(); } border = new AnnotationBorder(horizontal, vertical, width, dashes); } var quadPointRectangles = new List <QuadPointsQuadrilateral>(); if (annotationDictionary.TryGet(NameToken.Quadpoints, tokenScanner, out ArrayToken quadPointsArray)) { var values = new List <decimal>(); for (var i = 0; i < quadPointsArray.Length; i++) { if (!(quadPointsArray[i] is NumericToken value)) { continue; } values.Add(value.Data); if (values.Count == 8) { quadPointRectangles.Add(new QuadPointsQuadrilateral(new[] { new PdfPoint(values[0], values[1]), new PdfPoint(values[2], values[3]), new PdfPoint(values[4], values[5]), new PdfPoint(values[6], values[7]) })); values.Clear(); } } } yield return(new Annotation(annotationDictionary, annotationType, rectangle, contents, name, modifiedDate, flags, border, quadPointRectangles)); } }
public void LoadResourceDictionary(DictionaryToken resourceDictionary, bool isLenientParsing) { currentResourceState.Push(); if (resourceDictionary.TryGet(NameToken.Font, out var fontBase)) { var fontDictionary = DirectObjectFinder.Get <DictionaryToken>(fontBase, scanner); LoadFontDictionary(fontDictionary, isLenientParsing); } if (resourceDictionary.TryGet(NameToken.Xobject, out var xobjectBase)) { var xobjectDictionary = DirectObjectFinder.Get <DictionaryToken>(xobjectBase, scanner); foreach (var pair in xobjectDictionary.Data) { if (!(pair.Value is IndirectReferenceToken reference)) { throw new InvalidOperationException($"Expected the XObject dictionary value for key /{pair.Key} to be an indirect reference, instead got: {pair.Value}."); } currentResourceState[NameToken.Create(pair.Key)] = reference.Data; } } if (resourceDictionary.TryGet(NameToken.ExtGState, scanner, out DictionaryToken extGStateDictionaryToken)) { foreach (var pair in extGStateDictionaryToken.Data) { var name = NameToken.Create(pair.Key); var state = DirectObjectFinder.Get <DictionaryToken>(pair.Value, scanner); extendedGraphicsStates[name] = state; } } if (resourceDictionary.TryGet(NameToken.ColorSpace, scanner, out DictionaryToken colorSpaceDictionary)) { foreach (var nameColorSpacePair in colorSpaceDictionary.Data) { var name = NameToken.Create(nameColorSpacePair.Key); if (DirectObjectFinder.TryGet(nameColorSpacePair.Value, scanner, out NameToken colorSpaceName)) { colorSpaceNames[name] = colorSpaceName; } else if (DirectObjectFinder.TryGet(nameColorSpacePair.Value, scanner, out ArrayToken colorSpaceArray)) { if (colorSpaceArray.Length == 0) { throw new PdfDocumentFormatException($"Empty ColorSpace array encountered in page resource dictionary: {resourceDictionary}."); } var first = colorSpaceArray.Data[0]; if (!(first is NameToken arrayNamedColorSpace)) { throw new PdfDocumentFormatException($"Invalid ColorSpace array encountered in page resource dictionary: {colorSpaceArray}."); } colorSpaceNames[name] = arrayNamedColorSpace; } else { throw new PdfDocumentFormatException($"Invalid ColorSpace token encountered in page resource dictionary: {nameColorSpacePair.Value}."); } } } }
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing) { if (!dictionary.TryGetOptionalTokenDirect(NameToken.FirstChar, pdfScanner, out NumericToken firstCharacterToken)) { if (!dictionary.TryGetOptionalTokenDirect(NameToken.BaseFont, pdfScanner, out NameToken baseFont)) { throw new InvalidFontFormatException($"The provided TrueType font dictionary did not contain a /FirstChar or a /BaseFont entry: {dictionary}."); } // Can use the AFM descriptor despite not being Type 1! var standard14Font = Standard14.GetAdobeFontMetrics(baseFont.Data); if (standard14Font == null) { throw new InvalidFontFormatException($"The provided TrueType font dictionary did not have a /FirstChar and did not match a Standard 14 font: {dictionary}."); } var fileSystemFont = systemFontFinder.GetTrueTypeFont(baseFont.Data); var thisEncoding = encodingReader.Read(dictionary, isLenientParsing); if (thisEncoding == null) { thisEncoding = new AdobeFontMetricsEncoding(standard14Font); } return(new TrueTypeStandard14FallbackSimpleFont(baseFont, standard14Font, thisEncoding, fileSystemFont)); } var firstCharacter = firstCharacterToken.Int; var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing); var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, fontDescriptorFactory, dictionary, isLenientParsing); // TODO: use the parsed font fully. var font = ParseTrueTypeFont(descriptor); var name = FontDictionaryAccessHelper.GetName(pdfScanner, dictionary, descriptor, isLenientParsing); CMap toUnicodeCMap = null; if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj)) { var toUnicode = DirectObjectFinder.Get <StreamToken>(toUnicodeObj, pdfScanner); var decodedUnicodeCMap = toUnicode.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor); if (encoding == null && font?.TableRegister?.CMapTable != null && font.TableRegister.PostScriptTable?.GlyphNames != null) { var postscript = font.TableRegister.PostScriptTable; // Synthesize an encoding var fakeEncoding = new Dictionary <int, string>(); for (var i = 0; i < 256; i++) { if (font.TableRegister.CMapTable.TryGetGlyphIndex(i, out var index)) { string glyphName; if (index >= 0 && index < postscript.GlyphNames.Length) { glyphName = postscript.GlyphNames[index]; } else { glyphName = index.ToString(); } fakeEncoding[i] = glyphName; } } encoding = new BuiltInEncoding(fakeEncoding); } return(new TrueTypeSimpleFont(name, descriptor, toUnicodeCMap, encoding, font, firstCharacter, widths)); }
public static EncryptionDictionary Read(DictionaryToken encryptionDictionary, IPdfTokenScanner tokenScanner) { if (encryptionDictionary == null) { throw new ArgumentNullException(nameof(encryptionDictionary)); } var filter = encryptionDictionary.Get <NameToken>(NameToken.Filter, tokenScanner); var code = EncryptionAlgorithmCode.Unrecognized; if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.V, tokenScanner, out NumericToken vNum)) { code = (EncryptionAlgorithmCode)vNum.Int; } var length = default(int?); if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.Length, tokenScanner, out NumericToken lengthToken)) { length = lengthToken.Int; } var revision = default(int); if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.R, tokenScanner, out NumericToken revisionToken)) { revision = revisionToken.Int; } byte[] ownerBytes = null; if (encryptionDictionary.TryGet(NameToken.O, out IToken ownerToken)) { if (ownerToken is StringToken ownerString) { ownerBytes = ownerString.GetBytes(); } else if (ownerToken is HexToken ownerHex) { ownerBytes = ownerHex.Bytes.ToArray(); } } byte[] userBytes = null; if (encryptionDictionary.TryGet(NameToken.U, out IToken userToken)) { if (userToken is StringToken userString) { userBytes = userString.GetBytes(); } else if (userToken is HexToken userHex) { userBytes = userHex.Bytes.ToArray(); } } var access = default(UserAccessPermissions); if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.P, tokenScanner, out NumericToken accessToken)) { // This can be bigger than an integer. access = (UserAccessPermissions)accessToken.Long; } byte[] userEncryptionBytes = null, ownerEncryptionBytes = null; if (revision >= 5) { ownerEncryptionBytes = GetEncryptionBytesOrDefault(encryptionDictionary, tokenScanner, false); userEncryptionBytes = GetEncryptionBytesOrDefault(encryptionDictionary, tokenScanner, true); } encryptionDictionary.TryGetOptionalTokenDirect(NameToken.EncryptMetaData, tokenScanner, out BooleanToken encryptMetadata); return(new EncryptionDictionary(filter.Data, code, length, revision, ownerBytes, userBytes, ownerEncryptionBytes, userEncryptionBytes, access, encryptionDictionary, encryptMetadata?.Data ?? true)); }
private static VerticalWritingMetrics ReadVerticalDisplacements(DictionaryToken dict) { var verticalDisplacements = new Dictionary <int, decimal>(); var positionVectors = new Dictionary <int, PdfVector>(); VerticalVectorComponents dw2; if (!dict.TryGet(NameToken.Dw2, out var dw2Token) || !(dw2Token is ArrayToken arrayVerticalComponents)) { dw2 = new VerticalVectorComponents(880, -1000); } else { var position = ((NumericToken)arrayVerticalComponents.Data[0]).Data; var displacement = ((NumericToken)arrayVerticalComponents.Data[1]).Data; dw2 = new VerticalVectorComponents(position, displacement); } // vertical metrics for individual CIDs. if (dict.TryGet(NameToken.W2, out var w2Token) && w2Token is ArrayToken w2) { for (var i = 0; i < w2.Data.Count; i++) { var c = (NumericToken)w2.Data[i]; var next = w2.Data[++i]; if (next is ArrayToken array) { for (int j = 0; j < array.Data.Count; j++) { int cid = c.Int + j; var w1y = (NumericToken)array.Data[j]; var v1x = (NumericToken)array.Data[++j]; var v1y = (NumericToken)array.Data[++j]; verticalDisplacements[cid] = w1y.Data; positionVectors[cid] = new PdfVector(v1x.Data, v1y.Data); } } else { int first = c.Int; int last = ((NumericToken)next).Int; var w1y = (NumericToken)w2.Data[++i]; var v1x = (NumericToken)w2.Data[++i]; var v1y = (NumericToken)w2.Data[++i]; for (var cid = first; cid <= last; cid++) { verticalDisplacements[cid] = w1y.Data; positionVectors[cid] = new PdfVector(v1x.Data, v1y.Data); } } } } return(new VerticalWritingMetrics(dw2, verticalDisplacements, positionVectors)); }