Пример #1
0
        public static PdfRectangle ToIntRectangle(this ArrayToken array, IPdfTokenScanner tokenScanner)
        {
            if (array == null)
            {
                throw new ArgumentNullException(nameof(array));
            }

            if (array.Data.Count != 4)
            {
                throw new PdfDocumentFormatException($"Cannot convert array to rectangle, expected 4 values instead got: {array}.");
            }

            return(new PdfRectangle(DirectObjectFinder.Get <NumericToken>(array[0], tokenScanner).Int,
                                    DirectObjectFinder.Get <NumericToken>(array[1], tokenScanner).Int,
                                    DirectObjectFinder.Get <NumericToken>(array[2], tokenScanner).Int,
                                    DirectObjectFinder.Get <NumericToken>(array[3], tokenScanner).Int));
        }
Пример #2
0
            /// <summary>
            /// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
            /// and replace the indirect reference with the correct/new one
            /// </summary>
            /// <param name="tokenToCopy">Token to inspect for reference</param>
            /// <param name="tokenScanner">scanner get the content from the original document</param>
            /// <returns>A copy of the token with all his content copied to the new document's stream</returns>
            private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
            {
                if (tokenToCopy is DictionaryToken dictionaryToken)
                {
                    var newContent = new Dictionary <NameToken, IToken>();
                    foreach (var setPair in dictionaryToken.Data)
                    {
                        var name  = setPair.Key;
                        var token = setPair.Value;
                        newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
                    }

                    return(new DictionaryToken(newContent));
                }
                else if (tokenToCopy is ArrayToken arrayToken)
                {
                    var newArray = new List <IToken>(arrayToken.Length);
                    foreach (var token in arrayToken.Data)
                    {
                        newArray.Add(CopyToken(token, tokenScanner));
                    }

                    return(new ArrayToken(newArray));
                }
                else if (tokenToCopy is IndirectReferenceToken referenceToken)
                {
                    var tokenObject = DirectObjectFinder.Get <IToken>(referenceToken.Data, tokenScanner);

                    Debug.Assert(!(tokenObject is IndirectReferenceToken));

                    var newToken = CopyToken(tokenObject, tokenScanner);
                    var objToken = context.WriteObject(memory, newToken);
                    return(new IndirectReferenceToken(objToken.Number));
                }
                else if (tokenToCopy is StreamToken streamToken)
                {
                    var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
                    Debug.Assert(properties != null);
                    return(new StreamToken(properties, new List <byte>(streamToken.Data)));
                }
                else // Non Complex Token - BooleanToken, NumericToken, NameToken, Etc...
                {
                    return(tokenToCopy);
                }
            }
Пример #3
0
        /// <summary>
        /// Get any embedded files contained in this PDF document.
        /// Since PDF 1.3 any external file referenced by the document may have its contents embedded within the referring PDF file,
        /// allowing its contents to be stored or transmitted along with the PDF file.
        /// </summary>
        /// <param name="embeddedFiles">The set of embedded files in this document.</param>
        /// <returns><see langword="true"/> if this document contains more than zero embedded files, otherwise <see langword="false"/>.</returns>
        public bool TryGetEmbeddedFiles(out IReadOnlyList <EmbeddedFile> embeddedFiles)
        {
            GuardDisposed();

            embeddedFiles = null;

            if (!catalog.CatalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken namesDictionary) ||
                !namesDictionary.TryGet(NameToken.EmbeddedFiles, pdfScanner, out DictionaryToken embeddedFileNamesDictionary))
            {
                return(false);
            }

            var embeddedFileNames = NameTreeParser.FlattenNameTreeToDictionary(embeddedFileNamesDictionary, pdfScanner, x => x);

            if (embeddedFileNames.Count == 0)
            {
                return(false);
            }

            var result = new List <EmbeddedFile>();

            foreach (var keyValuePair in embeddedFileNames)
            {
                if (!DirectObjectFinder.TryGet(keyValuePair.Value, pdfScanner, out DictionaryToken fileDescriptorDictionaryToken) ||
                    !fileDescriptorDictionaryToken.TryGet(NameToken.Ef, pdfScanner, out DictionaryToken efDictionary) ||
                    !efDictionary.TryGet(NameToken.F, pdfScanner, out StreamToken fileStreamToken))
                {
                    continue;
                }

                var fileSpecification = string.Empty;
                if (fileDescriptorDictionaryToken.TryGet(NameToken.F, pdfScanner, out IDataToken <string> fileSpecificationToken))
                {
                    fileSpecification = fileSpecificationToken.Data;
                }

                var fileBytes = fileStreamToken.Decode(filterProvider);

                result.Add(new EmbeddedFile(keyValuePair.Key, fileSpecification, fileBytes, fileStreamToken));
            }

            embeddedFiles = result;

            return(embeddedFiles.Count > 0);
        }
Пример #4
0
        private CharacterIdentifierToGlyphIndexMap GetCharacterIdentifierToGlyphIndexMap(DictionaryToken dictionary)
        {
            if (!dictionary.TryGet(NameToken.CidToGidMap, out var entry))
            {
                return(new CharacterIdentifierToGlyphIndexMap());
            }

            if (entry is NameToken)
            {
                return(new CharacterIdentifierToGlyphIndexMap());
            }

            var stream = DirectObjectFinder.Get <StreamToken>(entry, pdfScanner);

            var bytes = stream.Decode(filterProvider);

            return(new CharacterIdentifierToGlyphIndexMap(bytes));
        }
Пример #5
0
        public void GetFollowsSingleIndirectReferenceFromArray()
        {
            var reference  = new IndirectReference(10, 0);
            var reference2 = new IndirectReference(69, 0);

            const string expected = "Goopy";

            scanner.Objects[reference] = new ObjectToken(10, reference, new ArrayToken(new[]
            {
                new IndirectReferenceToken(reference2)
            }));

            scanner.Objects[reference2] = new ObjectToken(69, reference2, new StringToken(expected));

            var result = DirectObjectFinder.Get <StringToken>(reference, scanner);

            Assert.Equal(expected, result.Data);
        }
Пример #6
0
        public Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null)
        {
            if (!fontDictionary.TryGet(NameToken.Encoding, out var baseEncodingObject))
            {
                return(null);
            }

            if (baseEncodingObject is NameToken name)
            {
                return(GetNamedEncoding(descriptor, name));
            }

            DictionaryToken encodingDictionary = DirectObjectFinder.Get <DictionaryToken>(baseEncodingObject, pdfScanner);

            var encoding = ReadEncodingDictionary(encodingDictionary);

            return(encoding);
        }
Пример #7
0
        /// <summary>
        /// Convert the file trailer dictionary into a <see cref="DocumentInformation"/> instance.
        /// </summary>
        public static DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, TrailerDictionary trailer)
        {
            if (!trailer.Info.HasValue)
            {
                return(DocumentInformation.Default);
            }

            var token = DirectObjectFinder.Get <IToken>(trailer.Info.Value, pdfTokenScanner);

            if (token is DictionaryToken infoParsed)
            {
                var title        = GetEntryOrDefault(infoParsed, NameToken.Title);
                var author       = GetEntryOrDefault(infoParsed, NameToken.Author);
                var subject      = GetEntryOrDefault(infoParsed, NameToken.Subject);
                var keywords     = GetEntryOrDefault(infoParsed, NameToken.Keywords);
                var creator      = GetEntryOrDefault(infoParsed, NameToken.Creator);
                var producer     = GetEntryOrDefault(infoParsed, NameToken.Producer);
                var creationDate = GetEntryOrDefault(infoParsed, NameToken.CreationDate);
                var modifiedDate = GetEntryOrDefault(infoParsed, NameToken.ModDate);

                return(new DocumentInformation(infoParsed, title, author, subject,
                                               keywords, creator, producer, creationDate, modifiedDate));
            }
            else if (token is StreamToken streamToken)
            {
                var streamDictionary = streamToken.StreamDictionary;
                if (!streamDictionary.TryGet(NameToken.Type, out NameToken typeNameToken) || typeNameToken != "Metadata")
                {
                    throw new PdfDocumentFormatException($"Unknown document metadata type was found");
                }

                if (!streamDictionary.TryGet(NameToken.Subtype, out NameToken subtypeToken) || subtypeToken != "XML")
                {
                    throw new PdfDocumentFormatException($"Unknown document metadata subtype was found");
                }

                // We are not fully supporting XMP Stream so we left the user fully deserialize the stream
                return(DocumentInformation.Default);
            }
            else
            {
                throw new PdfDocumentFormatException($"Unknown document information token was found {token.GetType().Name}");
            }
        }
Пример #8
0
        public static Catalog Create(IndirectReference rootReference, DictionaryToken dictionary,
                                     IPdfTokenScanner scanner,
                                     bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog))
            {
                throw new PdfDocumentFormatException($"The type of the catalog dictionary was not Catalog: {dictionary}.");
            }

            if (!dictionary.TryGet(NameToken.Pages, out var value))
            {
                throw new PdfDocumentFormatException($"No pages entry was found in the catalog dictionary: {dictionary}.");
            }

            DictionaryToken pages;
            var             pagesReference = rootReference;

            if (value is IndirectReferenceToken pagesRef)
            {
                pagesReference = pagesRef.Data;
                pages          = DirectObjectFinder.Get <DictionaryToken>(pagesRef, scanner);
            }
            else if (value is DictionaryToken pagesDict)
            {
                pages = pagesDict;
            }
            else
            {
                pages = DirectObjectFinder.Get <DictionaryToken>(value, scanner);
            }

            var pageNumber = 0;

            var pageTree = ProcessPagesNode(pagesReference, pages, new IndirectReference(1, 0), true,
                                            scanner, isLenientParsing, ref pageNumber);

            return(new Catalog(dictionary, pages, pageTree));
        }
Пример #9
0
        private IReadOnlyDictionary <int, decimal> ReadWidths(DictionaryToken dict)
        {
            var widths = new Dictionary <int, decimal>();

            if (!dict.TryGet(NameToken.W, out var widthsItem) || !(widthsItem is ArrayToken widthArray))
            {
                return(widths);
            }

            int size    = widthArray.Data.Count;
            int counter = 0;

            while (counter < size)
            {
                var firstCode = (NumericToken)widthArray.Data[counter++];
                var next      = widthArray.Data[counter++];
                if (DirectObjectFinder.TryGet(next, pdfScanner, out ArrayToken array))
                {
                    int startRange = firstCode.Int;
                    int arraySize  = array.Data.Count;

                    for (int i = 0; i < arraySize; i++)
                    {
                        var width = (NumericToken)array.Data[i];
                        widths[startRange + i] = width.Data;
                    }
                }
                else
                {
                    var secondCode = (NumericToken)next;
                    var rangeWidth = (NumericToken)widthArray.Data[counter++];
                    int startRange = firstCode.Int;
                    int endRange   = secondCode.Int;
                    var width      = rangeWidth.Data;
                    for (var i = startRange; i <= endRange; i++)
                    {
                        widths[i] = width;
                    }
                }
            }

            return(widths);
        }
Пример #10
0
        private IReadOnlyDictionary <int, double> ReadWidths(DictionaryToken dict)
        {
            var widths = new Dictionary <int, double>();

            if (!dict.TryGet(NameToken.W, pdfScanner, out ArrayToken widthArray))
            {
                return(widths);
            }

            var size    = widthArray.Data.Count;
            var counter = 0;

            while (counter < size)
            {
                var firstCode = DirectObjectFinder.Get <NumericToken>(widthArray.Data[counter++], pdfScanner);
                var next      = widthArray.Data[counter++];
                if (DirectObjectFinder.TryGet(next, pdfScanner, out ArrayToken array))
                {
                    var startRange = firstCode.Int;
                    var arraySize  = array.Data.Count;

                    for (var i = 0; i < arraySize; i++)
                    {
                        var width = DirectObjectFinder.Get <NumericToken>(array.Data[i], pdfScanner);
                        widths[startRange + i] = width.Double;
                    }
                }
                else
                {
                    var secondCode = DirectObjectFinder.Get <NumericToken>(next, pdfScanner);
                    var rangeWidth = DirectObjectFinder.Get <NumericToken>(widthArray.Data[counter++], pdfScanner);
                    var startRange = firstCode.Int;
                    var endRange   = secondCode.Int;
                    var width      = rangeWidth.Double;
                    for (var i = startRange; i <= endRange; i++)
                    {
                        widths[i] = width;
                    }
                }
            }

            return(widths);
        }
Пример #11
0
        public DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, TrailerDictionary trailer)
        {
            if (!trailer.Info.HasValue)
            {
                return(DocumentInformation.Default);
            }

            var infoParsed = DirectObjectFinder.Get <DictionaryToken>(trailer.Info.Value, pdfTokenScanner);

            var title    = GetEntryOrDefault(infoParsed, NameToken.Title);
            var author   = GetEntryOrDefault(infoParsed, NameToken.Author);
            var subject  = GetEntryOrDefault(infoParsed, NameToken.Subject);
            var keywords = GetEntryOrDefault(infoParsed, NameToken.Keywords);
            var creator  = GetEntryOrDefault(infoParsed, NameToken.Creator);
            var producer = GetEntryOrDefault(infoParsed, NameToken.Producer);

            return(new DocumentInformation(infoParsed, title, author, subject,
                                           keywords, creator, producer));
        }
Пример #12
0
        public DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, DictionaryToken rootDictionary)
        {
            if (!rootDictionary.TryGet(NameToken.Info, out var infoBase))
            {
                return(DocumentInformation.Default);
            }

            var infoParsed = DirectObjectFinder.Get <DictionaryToken>(infoBase, pdfTokenScanner);

            var title    = GetEntryOrDefault(infoParsed, NameToken.Title);
            var author   = GetEntryOrDefault(infoParsed, NameToken.Author);
            var subject  = GetEntryOrDefault(infoParsed, NameToken.Subject);
            var keywords = GetEntryOrDefault(infoParsed, NameToken.Keywords);
            var creator  = GetEntryOrDefault(infoParsed, NameToken.Creator);
            var producer = GetEntryOrDefault(infoParsed, NameToken.Producer);

            return(new DocumentInformation(title, author, subject,
                                           keywords, creator, producer));
        }
Пример #13
0
        private static bool TryReadExplicitDestination(IToken value, Catalog catalog, IPdfTokenScanner pdfScanner,
                                                       ILog log, out ExplicitDestination destination)
        {
            destination = null;

            if (DirectObjectFinder.TryGet(value, pdfScanner, out ArrayToken valueArray) &&
                TryGetExplicitDestination(valueArray, catalog, log, out destination))
            {
                return(true);
            }

            if (DirectObjectFinder.TryGet(value, pdfScanner, out DictionaryToken valueDictionary) &&
                valueDictionary.TryGet(NameToken.D, pdfScanner, out valueArray) &&
                TryGetExplicitDestination(valueArray, catalog, log, out destination))
            {
                return(true);
            }

            return(false);
        }
Пример #14
0
        private CharacterIdentifierToGlyphIndexMap GetCharacterIdentifierToGlyphIndexMap(DictionaryToken dictionary)
        {
            if (!dictionary.TryGet(NameToken.CidToGidMap, out var entry))
            {
                return(new CharacterIdentifierToGlyphIndexMap());
            }

            if (DirectObjectFinder.TryGet(entry, pdfScanner, out NameToken _))
            {
                return(new CharacterIdentifierToGlyphIndexMap());
            }

            if (!DirectObjectFinder.TryGet(entry, pdfScanner, out StreamToken stream))
            {
                throw new PdfDocumentFormatException($"No stream or name token found for /CIDToGIDMap in dictionary: {dictionary}.");
            }

            var bytes = stream.Decode(filterProvider, pdfScanner);

            return(new CharacterIdentifierToGlyphIndexMap(bytes));
        }
Пример #15
0
        public Catalog Create(IPdfTokenScanner scanner, DictionaryToken dictionary)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog))
            {
                throw new PdfDocumentFormatException($"The type of the catalog dictionary was not Catalog: {dictionary}.");
            }

            if (!dictionary.TryGet(NameToken.Pages, out var value))
            {
                throw new PdfDocumentFormatException($"No pages entry was found in the catalog dictionary: {dictionary}.");
            }

            var pages = DirectObjectFinder.Get <DictionaryToken>(value, scanner);

            return(new Catalog(dictionary, pages));
        }
Пример #16
0
        private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner)
        {
            if (crossReferenceTable.Dictionary.ContainsKey(NameToken.Encrypt))
            {
                throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Dictionary);
            }

            if (!crossReferenceTable.Dictionary.TryGet(NameToken.Root, out var rootToken))
            {
                throw new PdfDocumentFormatException($"Missing root object specification in trailer: {crossReferenceTable.Dictionary}.");
            }

            var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(rootToken, pdfTokenScanner);

            if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
            {
                rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
            }

            return(rootDictionary);
        }
Пример #17
0
        private AcroFieldBase GetTextField(DictionaryToken fieldDictionary, NameToken fieldType, uint fieldFlags,
                                           AcroFieldCommonInformation information,
                                           int?pageNumber,
                                           PdfRectangle?bounds)
        {
            var textFlags = (AcroTextFieldFlags)fieldFlags;

            var textValue = default(string);

            if (fieldDictionary.TryGet(NameToken.V, out var textValueToken))
            {
                if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out StringToken valueStringToken))
                {
                    textValue = valueStringToken.Data;
                }
                else if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out HexToken valueHexToken))
                {
                    textValue = valueHexToken.Data;
                }
                else if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out StreamToken valueStreamToken))
                {
                    textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider).ToArray());
                }
            }

            var maxLength = default(int?);

            if (fieldDictionary.TryGetOptionalTokenDirect(NameToken.MaxLen, tokenScanner, out NumericToken maxLenToken))
            {
                maxLength = maxLenToken.Int;
            }

            var field = new AcroTextField(fieldDictionary, fieldType, textFlags, information,
                                          textValue,
                                          maxLength,
                                          pageNumber,
                                          bounds);

            return(field);
        }
Пример #18
0
        private string GetNamedString(NameToken name, DictionaryToken dictionary)
        {
            string content = null;

            if (dictionary.TryGet(name, out var contentToken))
            {
                if (contentToken is StringToken contentString)
                {
                    content = contentString.Data;
                }
                else if (contentToken is HexToken contentHex)
                {
                    content = contentHex.Data;
                }
                else if (DirectObjectFinder.TryGet(contentToken, tokenScanner, out StringToken indirectContentString))
                {
                    content = indirectContentString.Data;
                }
            }

            return(content);
        }
Пример #19
0
        private TrueTypeFontProgram ParseTrueTypeFont(FontDescriptor descriptor)
        {
            if (descriptor.FontFile == null)
            {
                try
                {
                    return(systemFontFinder.GetTrueTypeFont(descriptor.FontName.Data));
                }
                catch (Exception ex)
                {
                    log.Error($"Failed finding system font by name: {descriptor.FontName}.", ex);
                }
                // TODO: check if this font is present on the host OS. See: FileSystemFontProvider.java
                return(null);
            }

            if (descriptor.FontFile.FileType != DescriptorFontFile.FontFileType.TrueType)
            {
                throw new InvalidFontFormatException(
                          $"Expected a TrueType font in the TrueType font descriptor, instead it was {descriptor.FontFile.FileType}.");
            }

            try
            {
                var fontFileStream = DirectObjectFinder.Get <StreamToken>(descriptor.FontFile.ObjectKey, pdfScanner);

                var fontFile = fontFileStream.Decode(filterProvider);

                var font = trueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile)));

                return(font);
            }
            catch (Exception ex)
            {
                log.Error("Could not parse the TrueType font.", ex);

                return(null);
            }
        }
Пример #20
0
        private CharacterIdentifierSystemInfo GetSystemInfo(DictionaryToken dictionary)
        {
            if (!dictionary.TryGet(NameToken.CidSystemInfo, out var cidEntry))
            {
                throw new InvalidFontFormatException($"No CID System Info was found in the CID Font dictionary: {dictionary}");
            }

            if (cidEntry is DictionaryToken cidDictionary)
            {
            }
            else
            {
                cidDictionary =
                    DirectObjectFinder.Get <DictionaryToken>(cidEntry, pdfScanner);
            }

            var registry   = SafeKeyAccess(cidDictionary, NameToken.Registry);
            var ordering   = SafeKeyAccess(cidDictionary, NameToken.Ordering);
            var supplement = cidDictionary.GetIntOrDefault(NameToken.Supplement);

            return(new CharacterIdentifierSystemInfo(registry, ordering, supplement));
        }
Пример #21
0
        private bool TryGetFontDescriptor(DictionaryToken dictionary, out DictionaryToken descriptorDictionary)
        {
            descriptorDictionary = null;

            if (!dictionary.TryGet(NameToken.FontDescriptor, out var baseValue))
            {
                return(false);
            }

            try
            {
                var descriptor = DirectObjectFinder.Get <DictionaryToken>(baseValue, pdfScanner);

                descriptorDictionary = descriptor;
            }
            catch
            {
                return(false);
            }

            return(true);
        }
Пример #22
0
        private static EncryptionDictionary GetEncryptionDictionary(CrossReferenceTable crossReferenceTable, IPdfTokenScanner pdfTokenScanner)
        {
            if (crossReferenceTable.Trailer.EncryptionToken == null)
            {
                return(null);
            }


            if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken))
            {
                if (DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out NullToken _))
                {
                    return(null);
                }

                throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
            }

            var result = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);

            return(result);
        }
Пример #23
0
        private static NameToken GetFontName(DictionaryToken dictionary, IPdfTokenScanner scanner, bool isLenientParsing)
        {
            if (!dictionary.TryGet(NameToken.FontName, out var name) || !(name is NameToken nameToken))
            {
                if (name is IndirectReferenceToken nameReference)
                {
                    var indirectName = DirectObjectFinder.Get <NameToken>(nameReference, scanner);
                    return(indirectName);
                }

                if (isLenientParsing)
                {
                    nameToken = NameToken.Create(string.Empty);
                }
                else
                {
                    throw new InvalidOperationException("Could not parse the font descriptor, could not retrieve the font name. " + dictionary);
                }
            }

            return(nameToken);
        }
Пример #24
0
        /// <summary>
        /// Extract bookmarks, if any.
        /// </summary>
        public Bookmarks GetBookmarks(Catalog catalog)
        {
            if (!catalog.CatalogDictionary.TryGet(NameToken.Outlines, pdfScanner, out DictionaryToken outlinesDictionary))
            {
                return(null);
            }

            if (!isLenientParsing && outlinesDictionary.TryGet(NameToken.Type, pdfScanner, out NameToken typeName) &&
                typeName != NameToken.Outlines)
            {
                throw new PdfDocumentFormatException($"Outlines (bookmarks) dictionary did not have correct type specified: {typeName}.");
            }

            if (!outlinesDictionary.TryGet(NameToken.First, pdfScanner, out DictionaryToken next))
            {
                return(null);
            }

            var namedDestinations = ReadNamedDestinations(catalog, pdfScanner, isLenientParsing, log);

            var roots = new List <BookmarkNode>();
            var seen  = new HashSet <IndirectReference>();

            while (next != null)
            {
                ReadBookmarksRecursively(next, 0, false, seen, namedDestinations, catalog, roots);

                if (!next.TryGet(NameToken.Next, out IndirectReferenceToken nextReference) ||
                    !seen.Add(nextReference.Data))
                {
                    break;
                }

                next = DirectObjectFinder.Get <DictionaryToken>(nextReference, pdfScanner);
            }

            return(new Bookmarks(roots));
        }
Пример #25
0
        private static void ExtractNameTree(DictionaryToken nameTreeNodeDictionary, Catalog catalog, IPdfTokenScanner pdfScanner,
                                            bool isLenientParsing,
                                            ILog log,
                                            Dictionary <string, ExplicitDestination> explicitDestinations)
        {
            if (nameTreeNodeDictionary.TryGet(NameToken.Names, pdfScanner, out ArrayToken nodeNames))
            {
                for (var i = 0; i < nodeNames.Length; i += 2)
                {
                    if (!(nodeNames[i] is IDataToken <string> key))
                    {
                        continue;
                    }

                    var value = nodeNames[i + 1];

                    if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination))
                    {
                        explicitDestinations[key.Data] = destination;
                    }
                }
            }

            if (nameTreeNodeDictionary.TryGet(NameToken.Kids, pdfScanner, out ArrayToken kids))
            {
                foreach (var kid in kids.Data)
                {
                    if (DirectObjectFinder.TryGet(kid, pdfScanner, out DictionaryToken kidDictionary))
                    {
                        ExtractNameTree(kidDictionary, catalog, pdfScanner, isLenientParsing, log, explicitDestinations);
                    }
                    else if (!isLenientParsing)
                    {
                        throw new PdfDocumentFormatException($"Invalid kids entry in PDF name tree: {kid} in {kids}.");
                    }
                }
            }
        }
Пример #26
0
        private CharacterIdentifierToGlyphIndexMap GetCharacterIdentifierToGlyphIndexMap(DictionaryToken dictionary, bool isLenientParsing)
        {
            if (!dictionary.TryGet(NameToken.CidToGidMap, out var entry))
            {
                return(new CharacterIdentifierToGlyphIndexMap());
            }

            if (entry is NameToken name)
            {
                if (!name.Equals(NameToken.Identity) && !isLenientParsing)
                {
                    throw new InvalidOperationException($"The CIDToGIDMap in a Type 0 font should have the value /Identity, instead got: {name}.");
                }

                return(new CharacterIdentifierToGlyphIndexMap());
            }

            var stream = DirectObjectFinder.Get <StreamToken>(entry, pdfScanner);

            var bytes = stream.Decode(filterProvider);

            return(new CharacterIdentifierToGlyphIndexMap(bytes));
        }
Пример #27
0
        public Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null,
                             Encoding fontEncoding = null)
        {
            if (!fontDictionary.TryGet(NameToken.Encoding, out var baseEncodingObject))
            {
                return(null);
            }

            if (baseEncodingObject is NameToken name)
            {
                if (TryGetNamedEncoding(descriptor, name, out var namedEncoding))
                {
                    return(namedEncoding);
                }

                if (fontDictionary.TryGet(NameToken.BaseFont, pdfScanner, out NameToken baseFontName))
                {
                    if (string.Equals(baseFontName.Data, "ZapfDingbats", StringComparison.OrdinalIgnoreCase))
                    {
                        return(ZapfDingbatsEncoding.Instance);
                    }

                    if (string.Equals(baseFontName.Data, "Symbol", StringComparison.OrdinalIgnoreCase))
                    {
                        return(SymbolEncoding.Instance);
                    }

                    return(WinAnsiEncoding.Instance);
                }
            }

            DictionaryToken encodingDictionary = DirectObjectFinder.Get <DictionaryToken>(baseEncodingObject, pdfScanner);

            var encoding = ReadEncodingDictionary(encodingDictionary, fontEncoding);

            return(encoding);
        }
Пример #28
0
        private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing)
        {
            MediaBox mediaBox;

            if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) &&
                DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray))
            {
                if (mediaboxArray.Length != 4 && isLenientParsing)
                {
                    log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}.");

                    mediaBox = MediaBox.A4;

                    return(mediaBox);
                }

                mediaBox = new MediaBox(mediaboxArray.ToIntRectangle());
            }
            else
            {
                mediaBox = pageTreeMembers.MediaBox;

                if (mediaBox == null)
                {
                    if (isLenientParsing)
                    {
                        mediaBox = MediaBox.A4;
                    }
                    else
                    {
                        throw new InvalidOperationException("No mediabox was present for page: " + number);
                    }
                }
            }

            return(mediaBox);
        }
Пример #29
0
        private string SafeKeyAccess(DictionaryToken dictionary, NameToken keyName)
        {
            if (!dictionary.TryGet(keyName, out var token))
            {
                return(string.Empty);
            }

            if (token is StringToken str)
            {
                return(str.Data);
            }

            if (token is HexToken hex)
            {
                return(hex.Data);
            }

            if (token is IndirectReferenceToken obj)
            {
                return(DirectObjectFinder.Get <StringToken>(obj, pdfScanner).Data);
            }

            return(string.Empty);
        }
Пример #30
0
        private static (IndirectReference, DictionaryToken) ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner,
                                                                         out EncryptionDictionary encryptionDictionary)
        {
            encryptionDictionary = null;

            if (crossReferenceTable.Trailer.EncryptionToken != null)
            {
                if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken))
                {
                    throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
                }

                encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
            }

            var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);

            if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
            {
                rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
            }

            return(crossReferenceTable.Trailer.Root, rootDictionary);
        }