private string SafeKeyAccess(DictionaryToken dictionary, NameToken keyName) { if (!dictionary.TryGet(keyName, out var token)) { return(string.Empty); } if (token is StringToken str) { return(str.Data); } if (token is HexToken hex) { return(hex.Data); } if (token is IndirectReferenceToken obj) { if (DirectObjectFinder.TryGet(obj, pdfScanner, out StringToken stringToken)) { return(stringToken.Data); } if (DirectObjectFinder.TryGet(obj, pdfScanner, out HexToken hexToken)) { return(hexToken.Data); } throw new PdfDocumentFormatException($"Could not get key for name: {keyName} in {dictionary}."); } return(string.Empty); }
// This method is a basically a copy of the method UglyToad.PdfPig.Parser.PdfDocumentFactory.ParseTrailer() private static DictionaryToken ParseCatalog(CrossReferenceTable crossReferenceTable, IPdfTokenScanner pdfTokenScanner, out EncryptionDictionary encryptionDictionary) { encryptionDictionary = null; if (crossReferenceTable.Trailer.EncryptionToken != null) { if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken)) { throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}."); } encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner); } var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner); if (!rootDictionary.ContainsKey(NameToken.Type)) { rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog); } return(rootDictionary); }
private CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox) { CropBox cropBox; if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray)) { if (cropBoxArray.Length != 4) { log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox."); cropBox = new CropBox(mediaBox.Bounds); return(cropBox); } cropBox = new CropBox(cropBoxArray.ToIntRectangle(pdfScanner)); } else { cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds); } return(cropBox); }
private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers) { MediaBox mediaBox; if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) && DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray)) { if (mediaboxArray.Length != 4) { log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}. Defaulting to US Letter."); mediaBox = MediaBox.Letter; return(mediaBox); } mediaBox = new MediaBox(mediaboxArray.ToIntRectangle(pdfScanner)); } else { mediaBox = pageTreeMembers.MediaBox; if (mediaBox == null) { log.Error($"The MediaBox was the wrong missing for page {number}. Using US Letter."); // PDFBox defaults to US Letter. mediaBox = MediaBox.Letter; } } return(mediaBox); }
private AcroFieldBase GetTextField(DictionaryToken fieldDictionary, NameToken fieldType, uint fieldFlags, AcroFieldCommonInformation information) { var textFlags = (AcroTextFieldFlags)fieldFlags; var textValue = default(string); if (fieldDictionary.TryGet(NameToken.V, out var textValueToken)) { if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out StringToken valueStringToken)) { textValue = valueStringToken.Data; } else if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out HexToken valueHexToken)) { textValue = valueHexToken.Data; } else if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out StreamToken valueStreamToken)) { textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider).ToArray()); } } var maxLength = default(int?); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.MaxLen, tokenScanner, out NumericToken maxLenToken)) { maxLength = maxLenToken.Int; } var field = new AcroTextField(fieldDictionary, fieldType, textFlags, information, textValue, maxLength); return(field); }
private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner, out EncryptionDictionary encryptionDictionary) { encryptionDictionary = null; if (crossReferenceTable.Trailer.EncryptionToken != null) { if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken)) { throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}."); } encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner); //throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Trailer.EncryptionToken); } var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner); if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing) { rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog); } return(rootDictionary); }
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing) { var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont); var cMap = ReadEncoding(dictionary, out var isCMapPredefined); ICidFont cidFont; if (TryGetFirstDescendant(dictionary, out var descendantObject)) { DictionaryToken descendantFontDictionary; if (descendantObject is IndirectReferenceToken obj) { var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, scanner); descendantFontDictionary = parsed; } else { descendantFontDictionary = (DictionaryToken) descendantObject; } cidFont = ParseDescendant(descendantFontDictionary, isLenientParsing); } else { throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary); } var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont); CMap toUnicodeCMap = null; if (dictionary.ContainsKey(NameToken.ToUnicode)) { var toUnicodeValue = dictionary.Data[NameToken.ToUnicode]; if (DirectObjectFinder.TryGet<StreamToken>(toUnicodeValue, scanner, out var toUnicodeStream)) { var decodedUnicodeCMap = toUnicodeStream?.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } else if (DirectObjectFinder.TryGet<NameToken>(toUnicodeValue, scanner, out var toUnicodeName)) { toUnicodeCMap = CMapCache.Get(toUnicodeName.Data); } else { throw new PdfDocumentFormatException($"Invalid type of toUnicode CMap encountered. Got: {toUnicodeValue}."); } } var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean); return font; }
private ICidFontProgram ReadDescriptorFile(FontDescriptor descriptor) { if (descriptor?.FontFile == null) { return(null); } var fontFileStream = DirectObjectFinder.Get <StreamToken>(descriptor.FontFile.ObjectKey, pdfScanner); if (fontFileStream == null) { return(null); } var fontFile = fontFileStream.Decode(filterProvider); switch (descriptor.FontFile.FileType) { case DescriptorFontFile.FontFileType.TrueType: var input = new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile)); return(trueTypeFontParser.Parse(input)); case DescriptorFontFile.FontFileType.FromSubtype: { if (!DirectObjectFinder.TryGet(descriptor.FontFile.ObjectKey, pdfScanner, out StreamToken str)) { throw new NotSupportedException("Cannot read CID font from subtype."); } if (!str.StreamDictionary.TryGet(NameToken.Subtype, out NameToken subtypeName)) { throw new PdfDocumentFormatException($"The font file stream did not contain a subtype entry: {str.StreamDictionary}."); } if (subtypeName == NameToken.CidFontType0C) { var bytes = str.Decode(filterProvider); var font = compactFontFormatParser.Parse(new CompactFontFormatData(bytes)); return(font); } if (subtypeName == NameToken.Type1C) { } else if (subtypeName == NameToken.OpenType) { } else { throw new PdfDocumentFormatException($"Unexpected subtype for CID font: {subtypeName}."); } throw new NotSupportedException("Cannot read CID font from subtype."); } default: throw new NotSupportedException("Currently only TrueType fonts are supported."); } }
public static bool TryGetOptionalTokenDirect <T>(this DictionaryToken token, NameToken name, IPdfTokenScanner scanner, out T result) where T : IToken { result = default(T); if (token.TryGet(name, out var appearancesToken) && DirectObjectFinder.TryGet(appearancesToken, scanner, out T innerResult)) { result = innerResult; return(true); } return(false); }
public IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken, bool isLenientParsing) { if (!DirectObjectFinder.TryGet(fontReferenceToken, scanner, out DictionaryToken fontDictionaryToken)) { throw new PdfDocumentFormatException($"The requested font reference token {fontReferenceToken} wasn't a font."); } var font = fontFactory.Get(fontDictionaryToken, isLenientParsing); return(font); }
public void TryGetCanFollowMultipleReferenceLinks() { var reference1 = new IndirectReference(7, 0); var reference2 = new IndirectReference(9, 0); scanner.Objects[reference1] = new ObjectToken(10, reference1, new IndirectReferenceToken(reference2)); scanner.Objects[reference2] = new ObjectToken(12, reference2, new NumericToken(69)); Assert.True(DirectObjectFinder.TryGet(new IndirectReferenceToken(reference1), scanner, out NumericToken result)); Assert.Equal(69, result.Int); }
public IReadOnlyList <IFilter> GetFilters(DictionaryToken dictionary, IPdfTokenScanner scanner) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var token = dictionary.GetObjectOrDefault(NameToken.Filter, NameToken.F); if (token == null) { return(EmptyArray <IFilter> .Instance); } switch (token) { case ArrayToken filters: var result = new NameToken[filters.Data.Count]; for (var i = 0; i < filters.Data.Count; i++) { var filterToken = filters.Data[i]; var filterName = (NameToken)filterToken; result[i] = filterName; } return(GetNamedFilters(result)); case NameToken name: return(GetNamedFilters(new[] { name })); case IndirectReferenceToken irt: if (DirectObjectFinder.TryGet <NameToken>(irt, scanner, out var indirectName)) { return(GetNamedFilters(new [] { indirectName })); } else if (DirectObjectFinder.TryGet <ArrayToken>(irt, scanner, out var indirectArray)) { return(GetNamedFilters(indirectArray.Data.Select(x => (NameToken)x).ToList())); } else { throw new PdfDocumentFormatException($"The filter for the stream was not a valid object. Expected name or array, instead got: {token}."); } default: throw new PdfDocumentFormatException($"The filter for the stream was not a valid object. Expected name or array, instead got: {token}."); } }
/// <summary> /// Try and get the entry with a given name and type or look-up the object if it's an indirect reference. /// </summary> internal static bool TryGet <T>(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner tokenScanner, out T token) where T : IToken { token = default(T); if (!dictionary.TryGet(name, out var t) || !(t is T typedToken)) { if (t is IndirectReferenceToken reference) { return(DirectObjectFinder.TryGet(reference, tokenScanner, out token)); } return(false); } token = typedToken; return(true); }
/// <summary> /// Get any embedded files contained in this PDF document. /// Since PDF 1.3 any external file referenced by the document may have its contents embedded within the referring PDF file, /// allowing its contents to be stored or transmitted along with the PDF file. /// </summary> /// <param name="embeddedFiles">The set of embedded files in this document.</param> /// <returns><see langword="true"/> if this document contains more than zero embedded files, otherwise <see langword="false"/>.</returns> public bool TryGetEmbeddedFiles(out IReadOnlyList <EmbeddedFile> embeddedFiles) { GuardDisposed(); embeddedFiles = null; if (!catalog.CatalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken namesDictionary) || !namesDictionary.TryGet(NameToken.EmbeddedFiles, pdfScanner, out DictionaryToken embeddedFileNamesDictionary)) { return(false); } var embeddedFileNames = NameTreeParser.FlattenNameTreeToDictionary(embeddedFileNamesDictionary, pdfScanner, isLenientParsing, x => x); if (embeddedFileNames.Count == 0) { return(false); } var result = new List <EmbeddedFile>(); foreach (var keyValuePair in embeddedFileNames) { if (!DirectObjectFinder.TryGet(keyValuePair.Value, pdfScanner, out DictionaryToken fileDescriptorDictionaryToken) || !fileDescriptorDictionaryToken.TryGet(NameToken.Ef, pdfScanner, out DictionaryToken efDictionary) || !efDictionary.TryGet(NameToken.F, pdfScanner, out StreamToken fileStreamToken)) { continue; } var fileSpecification = string.Empty; if (fileDescriptorDictionaryToken.TryGet(NameToken.F, pdfScanner, out IDataToken <string> fileSpecificationToken)) { fileSpecification = fileSpecificationToken.Data; } var fileBytes = fileStreamToken.Decode(filterProvider); result.Add(new EmbeddedFile(keyValuePair.Key, fileSpecification, fileBytes, fileStreamToken)); } embeddedFiles = result; return(embeddedFiles.Count > 0); }
private IReadOnlyDictionary <int, double> ReadWidths(DictionaryToken dict) { var widths = new Dictionary <int, double>(); if (!dict.TryGet(NameToken.W, pdfScanner, out ArrayToken widthArray)) { return(widths); } var size = widthArray.Data.Count; var counter = 0; while (counter < size) { var firstCode = DirectObjectFinder.Get <NumericToken>(widthArray.Data[counter++], pdfScanner); var next = widthArray.Data[counter++]; if (DirectObjectFinder.TryGet(next, pdfScanner, out ArrayToken array)) { var startRange = firstCode.Int; var arraySize = array.Data.Count; for (var i = 0; i < arraySize; i++) { var width = DirectObjectFinder.Get <NumericToken>(array.Data[i], pdfScanner); widths[startRange + i] = width.Double; } } else { var secondCode = DirectObjectFinder.Get <NumericToken>(next, pdfScanner); var rangeWidth = DirectObjectFinder.Get <NumericToken>(widthArray.Data[counter++], pdfScanner); var startRange = firstCode.Int; var endRange = secondCode.Int; var width = rangeWidth.Double; for (var i = startRange; i <= endRange; i++) { widths[i] = width; } } } return(widths); }
private IReadOnlyDictionary <int, decimal> ReadWidths(DictionaryToken dict) { var widths = new Dictionary <int, decimal>(); if (!dict.TryGet(NameToken.W, out var widthsItem) || !(widthsItem is ArrayToken widthArray)) { return(widths); } int size = widthArray.Data.Count; int counter = 0; while (counter < size) { var firstCode = (NumericToken)widthArray.Data[counter++]; var next = widthArray.Data[counter++]; if (DirectObjectFinder.TryGet(next, pdfScanner, out ArrayToken array)) { int startRange = firstCode.Int; int arraySize = array.Data.Count; for (int i = 0; i < arraySize; i++) { var width = (NumericToken)array.Data[i]; widths[startRange + i] = width.Data; } } else { var secondCode = (NumericToken)next; var rangeWidth = (NumericToken)widthArray.Data[counter++]; int startRange = firstCode.Int; int endRange = secondCode.Int; var width = rangeWidth.Data; for (var i = startRange; i <= endRange; i++) { widths[i] = width; } } } return(widths); }
private static bool TryReadExplicitDestination(IToken value, Catalog catalog, IPdfTokenScanner pdfScanner, ILog log, out ExplicitDestination destination) { destination = null; if (DirectObjectFinder.TryGet(value, pdfScanner, out ArrayToken valueArray) && TryGetExplicitDestination(valueArray, catalog, log, out destination)) { return(true); } if (DirectObjectFinder.TryGet(value, pdfScanner, out DictionaryToken valueDictionary) && valueDictionary.TryGet(NameToken.D, pdfScanner, out valueArray) && TryGetExplicitDestination(valueArray, catalog, log, out destination)) { return(true); } return(false); }
private CharacterIdentifierToGlyphIndexMap GetCharacterIdentifierToGlyphIndexMap(DictionaryToken dictionary) { if (!dictionary.TryGet(NameToken.CidToGidMap, out var entry)) { return(new CharacterIdentifierToGlyphIndexMap()); } if (DirectObjectFinder.TryGet(entry, pdfScanner, out NameToken _)) { return(new CharacterIdentifierToGlyphIndexMap()); } if (!DirectObjectFinder.TryGet(entry, pdfScanner, out StreamToken stream)) { throw new PdfDocumentFormatException($"No stream or name token found for /CIDToGIDMap in dictionary: {dictionary}."); } var bytes = stream.Decode(filterProvider, pdfScanner); return(new CharacterIdentifierToGlyphIndexMap(bytes)); }
private string GetNamedString(NameToken name, DictionaryToken dictionary) { string content = null; if (dictionary.TryGet(name, out var contentToken)) { if (contentToken is StringToken contentString) { content = contentString.Data; } else if (contentToken is HexToken contentHex) { content = contentHex.Data; } else if (DirectObjectFinder.TryGet(contentToken, tokenScanner, out StringToken indirectContentString)) { content = indirectContentString.Data; } } return(content); }
private static EncryptionDictionary GetEncryptionDictionary(CrossReferenceTable crossReferenceTable, IPdfTokenScanner pdfTokenScanner) { if (crossReferenceTable.Trailer.EncryptionToken == null) { return(null); } if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken)) { if (DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out NullToken _)) { return(null); } throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}."); } var result = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner); return(result); }
private static void ExtractNameTree(DictionaryToken nameTreeNodeDictionary, Catalog catalog, IPdfTokenScanner pdfScanner, bool isLenientParsing, ILog log, Dictionary <string, ExplicitDestination> explicitDestinations) { if (nameTreeNodeDictionary.TryGet(NameToken.Names, pdfScanner, out ArrayToken nodeNames)) { for (var i = 0; i < nodeNames.Length; i += 2) { if (!(nodeNames[i] is IDataToken <string> key)) { continue; } var value = nodeNames[i + 1]; if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination)) { explicitDestinations[key.Data] = destination; } } } if (nameTreeNodeDictionary.TryGet(NameToken.Kids, pdfScanner, out ArrayToken kids)) { foreach (var kid in kids.Data) { if (DirectObjectFinder.TryGet(kid, pdfScanner, out DictionaryToken kidDictionary)) { ExtractNameTree(kidDictionary, catalog, pdfScanner, isLenientParsing, log, explicitDestinations); } else if (!isLenientParsing) { throw new PdfDocumentFormatException($"Invalid kids entry in PDF name tree: {kid} in {kids}."); } } } }
public Encoding Read(DictionaryToken fontDictionary, FontDescriptor descriptor = null, Encoding fontEncoding = null) { if (!fontDictionary.TryGet(NameToken.Encoding, out var baseEncodingObject)) { return(null); } if (DirectObjectFinder.TryGet(baseEncodingObject, pdfScanner, out NameToken name)) { if (TryGetNamedEncoding(descriptor, name, out var namedEncoding)) { return(namedEncoding); } if (fontDictionary.TryGet(NameToken.BaseFont, pdfScanner, out NameToken baseFontName)) { if (string.Equals(baseFontName.Data, "ZapfDingbats", StringComparison.OrdinalIgnoreCase)) { return(ZapfDingbatsEncoding.Instance); } if (string.Equals(baseFontName.Data, "Symbol", StringComparison.OrdinalIgnoreCase)) { return(SymbolEncoding.Instance); } return(WinAnsiEncoding.Instance); } } DictionaryToken encodingDictionary = DirectObjectFinder.Get <DictionaryToken>(baseEncodingObject, pdfScanner); var encoding = ReadEncodingDictionary(encodingDictionary, fontEncoding); return(encoding); }
private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { MediaBox mediaBox; if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) && DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray)) { if (mediaboxArray.Length != 4 && isLenientParsing) { log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}."); mediaBox = MediaBox.A4; return(mediaBox); } mediaBox = new MediaBox(mediaboxArray.ToIntRectangle()); } else { mediaBox = pageTreeMembers.MediaBox; if (mediaBox == null) { if (isLenientParsing) { mediaBox = MediaBox.A4; } else { throw new InvalidOperationException("No mediabox was present for page: " + number); } } } return(mediaBox); }
public AcroForm GetAcroForm(Catalog catalog) { if (!catalog.CatalogDictionary.TryGet(NameToken.AcroForm, out var acroRawToken) || !DirectObjectFinder.TryGet(acroRawToken, tokenScanner, out DictionaryToken acroDictionary)) { return(null); } var signatureFlags = (SignatureFlags)0; if (acroDictionary.TryGetOptionalTokenDirect(NameToken.SigFlags, tokenScanner, out NumericToken signatureToken)) { signatureFlags = (SignatureFlags)signatureToken.Int; } var needAppearances = false; if (acroDictionary.TryGetOptionalTokenDirect(NameToken.NeedAppearances, tokenScanner, out BooleanToken appearancesToken)) { needAppearances = appearancesToken.Data; } var calculationOrder = default(ArrayToken); acroDictionary.TryGetOptionalTokenDirect(NameToken.Co, tokenScanner, out calculationOrder); var formResources = default(DictionaryToken); acroDictionary.TryGetOptionalTokenDirect(NameToken.Dr, tokenScanner, out formResources); var da = default(string); if (acroDictionary.TryGetOptionalTokenDirect(NameToken.Da, tokenScanner, out StringToken daToken)) { da = daToken.Data; } else if (acroDictionary.TryGetOptionalTokenDirect(NameToken.Da, tokenScanner, out HexToken daHexToken)) { da = daHexToken.Data; } var q = default(int?); if (acroDictionary.TryGetOptionalTokenDirect(NameToken.Q, tokenScanner, out NumericToken qToken)) { q = qToken.Int; } var fieldsToken = acroDictionary.Data[NameToken.Fields.Data]; if (!DirectObjectFinder.TryGet(fieldsToken, tokenScanner, out ArrayToken fieldsArray)) { throw new PdfDocumentFormatException($"Could not retrieve the fields array for an AcroForm: {acroDictionary}."); } var fields = new Dictionary <IndirectReference, AcroFieldBase>(fieldsArray.Length); foreach (var fieldToken in fieldsArray.Data) { if (!(fieldToken is IndirectReferenceToken fieldReferenceToken)) { throw new PdfDocumentFormatException($"The fields array should only contain indirect references, instead got: {fieldToken}."); } var fieldDictionary = DirectObjectFinder.Get <DictionaryToken>(fieldToken, tokenScanner); var field = GetAcroField(fieldDictionary, catalog, new List <DictionaryToken>(0)); fields[fieldReferenceToken.Data] = field; } return(new AcroForm(acroDictionary, signatureFlags, needAppearances, fields)); }
private AcroFieldBase GetChoiceField(DictionaryToken fieldDictionary, NameToken fieldType, uint fieldFlags, AcroFieldCommonInformation information, int?pageNumber, PdfRectangle?bounds) { var selectedOptions = EmptyArray <string> .Instance; if (fieldDictionary.TryGet(NameToken.V, out var valueToken)) { if (DirectObjectFinder.TryGet(valueToken, tokenScanner, out StringToken valueString)) { selectedOptions = new[] { valueString.Data }; } else if (DirectObjectFinder.TryGet(valueToken, tokenScanner, out HexToken valueHex)) { selectedOptions = new[] { valueHex.Data }; } else if (DirectObjectFinder.TryGet(valueToken, tokenScanner, out ArrayToken valueArray)) { selectedOptions = new string[valueArray.Length]; for (var i = 0; i < valueArray.Length; i++) { var valueOptToken = valueArray.Data[i]; if (DirectObjectFinder.TryGet(valueOptToken, tokenScanner, out StringToken valueOptString)) { selectedOptions[i] = valueOptString.Data; } else if (DirectObjectFinder.TryGet(valueOptToken, tokenScanner, out HexToken valueOptHex)) { selectedOptions[i] = valueOptHex.Data; } } } } var selectedIndices = default(int[]); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.I, tokenScanner, out ArrayToken indicesArray)) { selectedIndices = new int[indicesArray.Length]; for (var i = 0; i < indicesArray.Data.Count; i++) { var token = indicesArray.Data[i]; var numericToken = DirectObjectFinder.Get <NumericToken>(token, tokenScanner); selectedIndices[i] = numericToken.Int; } } var options = new List <AcroChoiceOption>(); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.Opt, tokenScanner, out ArrayToken optionsArrayToken)) { for (var i = 0; i < optionsArrayToken.Data.Count; i++) { var optionToken = optionsArrayToken.Data[i]; if (DirectObjectFinder.TryGet(optionToken, tokenScanner, out StringToken optionStringToken)) { var name = optionStringToken.Data; var isSelected = IsChoiceSelected(selectedOptions, selectedIndices, i, name); options.Add(new AcroChoiceOption(i, isSelected, optionStringToken.Data)); } else if (DirectObjectFinder.TryGet(optionToken, tokenScanner, out HexToken optionHexToken)) { var name = optionHexToken.Data; var isSelected = IsChoiceSelected(selectedOptions, selectedIndices, i, name); options.Add(new AcroChoiceOption(i, isSelected, optionHexToken.Data)); } else if (DirectObjectFinder.TryGet(optionToken, tokenScanner, out ArrayToken optionArrayToken)) { if (optionArrayToken.Length != 2) { throw new PdfDocumentFormatException($"An option array containing array elements should contain 2 strings, instead got: {optionArrayToken}."); } string exportValue; if (DirectObjectFinder.TryGet(optionArrayToken.Data[0], tokenScanner, out StringToken exportValueStringToken)) { exportValue = exportValueStringToken.Data; } else if (DirectObjectFinder.TryGet(optionArrayToken.Data[0], tokenScanner, out HexToken exportValueHexToken)) { exportValue = exportValueHexToken.Data; } else { throw new PdfDocumentFormatException($"An option array array element's first value should be the export value string, instead got: {optionArrayToken.Data[0]}."); } string name; if (DirectObjectFinder.TryGet(optionArrayToken.Data[1], tokenScanner, out StringToken nameStringToken)) { name = nameStringToken.Data; } else if (DirectObjectFinder.TryGet(optionArrayToken.Data[1], tokenScanner, out HexToken nameHexToken)) { name = nameHexToken.Data; } else { throw new PdfDocumentFormatException($"An option array array element's second value should be the option name string, instead got: {optionArrayToken.Data[1]}."); } var isSelected = IsChoiceSelected(selectedOptions, selectedIndices, i, name); options.Add(new AcroChoiceOption(i, isSelected, name, exportValue)); } else { throw new PdfDocumentFormatException($"An option array should contain either strings or 2 element arrays, instead got: {optionToken}."); } } } var choiceFlags = (AcroChoiceFieldFlags)fieldFlags; if (choiceFlags.HasFlag(AcroChoiceFieldFlags.Combo)) { var field = new AcroComboBoxField(fieldDictionary, fieldType, choiceFlags, information, options, selectedOptions, selectedIndices, pageNumber, bounds); return(field); } var topIndex = default(int?); if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.Ti, tokenScanner, out NumericToken topIndexToken)) { topIndex = topIndexToken.Int; } return(new AcroListBoxField(fieldDictionary, fieldType, choiceFlags, information, options, selectedOptions, selectedIndices, topIndex, pageNumber, bounds)); }
public IEnumerable <Annotation> GetAnnotations() { if (!pageDictionary.TryGet(NameToken.Annots, out IToken annotationsToken) || !DirectObjectFinder.TryGet(annotationsToken, tokenScanner, out ArrayToken annotationsArray)) { yield break; } foreach (var token in annotationsArray.Data) { if (!DirectObjectFinder.TryGet(token, tokenScanner, out DictionaryToken annotationDictionary)) { if (isLenientParsing) { continue; } throw new PdfDocumentFormatException($"The annotations dictionary contained an annotation which wasn't a dictionary: {token}."); } if (!isLenientParsing && annotationDictionary.TryGet(NameToken.Type, out NameToken dictionaryType)) { if (dictionaryType != NameToken.Annot) { throw new PdfDocumentFormatException($"The annotations dictionary contained a non-annotation type dictionary: {annotationDictionary}."); } } var type = annotationDictionary.Get <NameToken>(NameToken.Subtype, tokenScanner); var annotationType = type.ToAnnotationType(); var rectangle = annotationDictionary.Get <ArrayToken>(NameToken.Rect, tokenScanner).ToRectangle(tokenScanner); var contents = GetNamedString(NameToken.Contents, annotationDictionary); var name = GetNamedString(NameToken.Nm, annotationDictionary); var modifiedDate = GetNamedString(NameToken.M, annotationDictionary); var flags = (AnnotationFlags)0; if (annotationDictionary.TryGet(NameToken.F, out var flagsToken) && DirectObjectFinder.TryGet(flagsToken, tokenScanner, out NumericToken flagsNumericToken)) { flags = (AnnotationFlags)flagsNumericToken.Int; } var border = AnnotationBorder.Default; if (annotationDictionary.TryGet(NameToken.Border, out var borderToken) && DirectObjectFinder.TryGet(borderToken, tokenScanner, out ArrayToken borderArray) && borderArray.Length >= 3) { var horizontal = borderArray.GetNumeric(0).Data; var vertical = borderArray.GetNumeric(1).Data; var width = borderArray.GetNumeric(2).Data; var dashes = default(IReadOnlyList <decimal>); if (borderArray.Length == 4 && borderArray.Data[4] is ArrayToken dashArray) { dashes = dashArray.Data.OfType <NumericToken>().Select(x => x.Data).ToList(); } border = new AnnotationBorder(horizontal, vertical, width, dashes); } var quadPointRectangles = new List <QuadPointsQuadrilateral>(); if (annotationDictionary.TryGet(NameToken.Quadpoints, tokenScanner, out ArrayToken quadPointsArray)) { var values = new List <decimal>(); for (var i = 0; i < quadPointsArray.Length; i++) { if (!(quadPointsArray[i] is NumericToken value)) { continue; } values.Add(value.Data); if (values.Count == 8) { quadPointRectangles.Add(new QuadPointsQuadrilateral(new[] { new PdfPoint(values[0], values[1]), new PdfPoint(values[2], values[3]), new PdfPoint(values[4], values[5]), new PdfPoint(values[6], values[7]) })); values.Clear(); } } } yield return(new Annotation(annotationDictionary, annotationType, rectangle, contents, name, modifiedDate, flags, border, quadPointRectangles)); } }
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing) { var usingStandard14Only = !dictionary.ContainsKey(NameToken.FirstChar) || !dictionary.ContainsKey(NameToken.Widths); if (usingStandard14Only) { // TODO: some fonts combine standard 14 font with other metrics. if (!dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) || !(baseFontToken is NameToken standard14Name)) { throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}"); } var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Data); return(new Type1Standard14Font(metrics)); } var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary); var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary); var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing); if (!dictionary.TryGet(NameToken.FontDescriptor, out var _)) { if (dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) && DirectObjectFinder.TryGet(baseFontToken, pdfScanner, out NameToken baseFontName)) { var metrics = Standard14.GetAdobeFontMetrics(baseFontName.Data); var overrideEncoding = encodingReader.Read(dictionary, isLenientParsing); return(new Type1Standard14Font(metrics, overrideEncoding)); } } var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, fontDescriptorFactory, dictionary, isLenientParsing); var font = ParseFontProgram(descriptor, isLenientParsing); var name = FontDictionaryAccessHelper.GetName(pdfScanner, dictionary, descriptor, isLenientParsing); CMap toUnicodeCMap = null; if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj)) { var toUnicode = DirectObjectFinder.Get <StreamToken>(toUnicodeObj, pdfScanner); var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } Encoding fromFont = null; font?.Match(x => fromFont = x.Encoding != null ? new BuiltInEncoding(x.Encoding) : default(Encoding), x => { if (x.Fonts != null && x.Fonts.Count > 0) { fromFont = x.Fonts.First().Value.Encoding; } }); Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor, fromFont); if (encoding == null) { font?.Match(x => encoding = new BuiltInEncoding(x.Encoding), _ => {}); } return(new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap, font)); }
public AcroForm GetAcroForm(Catalog catalog) { if (!catalog.CatalogDictionary.TryGet(NameToken.AcroForm, out var acroRawToken)) { return(null); } if (!DirectObjectFinder.TryGet(acroRawToken, tokenScanner, out DictionaryToken acroDictionary)) { var fieldsRefs = new List <IndirectReferenceToken>(); // Invalid reference, try constructing the form from a Brute Force scan. foreach (var reference in crossReferenceTable.ObjectOffsets.Keys) { var referenceToken = new IndirectReferenceToken(reference); if (!DirectObjectFinder.TryGet(referenceToken, tokenScanner, out DictionaryToken dict)) { continue; } if (dict.TryGet(NameToken.Kids, tokenScanner, out ArrayToken _) && dict.TryGet(NameToken.T, tokenScanner, out StringToken _)) { fieldsRefs.Add(referenceToken); } } if (fieldsRefs.Count == 0) { return(null); } acroDictionary = new DictionaryToken(new Dictionary <NameToken, IToken> { { NameToken.Fields, new ArrayToken(fieldsRefs) } }); } var signatureFlags = (SignatureFlags)0; if (acroDictionary.TryGetOptionalTokenDirect(NameToken.SigFlags, tokenScanner, out NumericToken signatureToken)) { signatureFlags = (SignatureFlags)signatureToken.Int; } var needAppearances = false; if (acroDictionary.TryGetOptionalTokenDirect(NameToken.NeedAppearances, tokenScanner, out BooleanToken appearancesToken)) { needAppearances = appearancesToken.Data; } var calculationOrder = default(ArrayToken); acroDictionary.TryGetOptionalTokenDirect(NameToken.Co, tokenScanner, out calculationOrder); var formResources = default(DictionaryToken); acroDictionary.TryGetOptionalTokenDirect(NameToken.Dr, tokenScanner, out formResources); var da = default(string); if (acroDictionary.TryGetOptionalTokenDirect(NameToken.Da, tokenScanner, out StringToken daToken)) { da = daToken.Data; } else if (acroDictionary.TryGetOptionalTokenDirect(NameToken.Da, tokenScanner, out HexToken daHexToken)) { da = daHexToken.Data; } var q = default(int?); if (acroDictionary.TryGetOptionalTokenDirect(NameToken.Q, tokenScanner, out NumericToken qToken)) { q = qToken.Int; } if (!acroDictionary.TryGet(NameToken.Fields, tokenScanner, out ArrayToken fieldsArray)) { return(null); } var fields = new Dictionary <IndirectReference, AcroFieldBase>(fieldsArray.Length); foreach (var fieldToken in fieldsArray.Data) { if (!(fieldToken is IndirectReferenceToken fieldReferenceToken)) { throw new PdfDocumentFormatException($"The fields array should only contain indirect references, instead got: {fieldToken}."); } var fieldDictionary = DirectObjectFinder.Get <DictionaryToken>(fieldToken, tokenScanner); var field = GetAcroField(fieldDictionary, catalog, new List <DictionaryToken>(0)); fields[fieldReferenceToken.Data] = field; } return(new AcroForm(acroDictionary, signatureFlags, needAppearances, fields)); }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } var type = dictionary.GetNameOrDefault(NameToken.Type); if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing) { throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'."); } var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) { rotation = new PageRotationDegrees(rotateToken.Int); } MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var stackDepth = 0; while (pageTreeMembers.ParentResources.Count > 0) { var resource = pageTreeMembers.ParentResources.Dequeue(); resourceStore.LoadResourceDictionary(resource, isLenientParsing); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { resourceStore.LoadResourceDictionary(resources, isLenientParsing); stackDepth++; } UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); PageContent content = default(PageContent); if (!dictionary.TryGet(NameToken.Contents, out var contents)) { // ignored for now, is it possible? check the spec... } else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array)) { var bytes = new List <byte>(); for (var i = 0; i < array.Data.Count; i++) { var item = array.Data[i]; if (!(item is IndirectReferenceToken obj)) { throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}."); } var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner); if (contentStream == null) { throw new InvalidOperationException($"Could not find the contents for object {obj}."); } bytes.AddRange(contentStream.Decode(filterProvider)); if (i < array.Data.Count - 1) { bytes.Add((byte)'\n'); } } content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } else { var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner); if (contentStream == null) { throw new InvalidOperationException("Failed to parse the content for the page: " + number); } var bytes = contentStream.Decode(filterProvider); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary, isLenientParsing), pdfScanner); for (var i = 0; i < stackDepth; i++) { resourceStore.UnloadResourceDictionary(); } return(page); }
public static ColorSpaceDetails GetColorSpaceDetails(ColorSpace?colorSpace, DictionaryToken imageDictionary, IPdfTokenScanner scanner, IResourceStore resourceStore, ILookupFilterProvider filterProvider, bool cannotRecurse = false) { if (!colorSpace.HasValue) { return(UnsupportedColorSpaceDetails.Instance); } switch (colorSpace.Value) { case ColorSpace.DeviceGray: return(DeviceGrayColorSpaceDetails.Instance); case ColorSpace.DeviceRGB: return(DeviceRgbColorSpaceDetails.Instance); case ColorSpace.DeviceCMYK: return(DeviceCmykColorSpaceDetails.Instance); case ColorSpace.CalGray: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.CalRGB: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.Lab: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.ICCBased: return(UnsupportedColorSpaceDetails.Instance); case ColorSpace.Indexed: { if (cannotRecurse) { return(UnsupportedColorSpaceDetails.Instance); } if (!imageDictionary.TryGet(NameToken.ColorSpace, scanner, out ArrayToken colorSpaceArray) || colorSpaceArray.Length != 4) { // Error instead? return(UnsupportedColorSpaceDetails.Instance); } var first = colorSpaceArray[0] as NameToken; if (first == null || !ColorSpaceMapper.TryMap(first, resourceStore, out var innerColorSpace) || innerColorSpace != ColorSpace.Indexed) { return(UnsupportedColorSpaceDetails.Instance); } var second = colorSpaceArray[1]; ColorSpaceDetails baseDetails; if (DirectObjectFinder.TryGet(second, scanner, out NameToken baseColorSpaceNameToken) && ColorSpaceMapper.TryMap(baseColorSpaceNameToken, resourceStore, out var baseColorSpaceName)) { baseDetails = GetColorSpaceDetails( baseColorSpaceName, imageDictionary, scanner, resourceStore, filterProvider, true); } else if (DirectObjectFinder.TryGet(second, scanner, out ArrayToken baseColorSpaceArrayToken) && baseColorSpaceArrayToken.Length > 0 && baseColorSpaceArrayToken[0] is NameToken baseColorSpaceArrayNameToken && ColorSpaceMapper.TryMap(baseColorSpaceArrayNameToken, resourceStore, out var baseColorSpaceArrayColorSpace)) { var pseudoImageDictionary = new DictionaryToken( new Dictionary <NameToken, IToken> { { NameToken.ColorSpace, baseColorSpaceArrayToken } }); baseDetails = GetColorSpaceDetails( baseColorSpaceArrayColorSpace, pseudoImageDictionary, scanner, resourceStore, filterProvider, true); }