Exemplo n.º 1
0
        private static CosBase ParseTrailer(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable,
                                            DynamicParser dynamicParser, BruteForceSearcher bruteForceSearcher, CosObjectPool pool, bool isLenientParsing)
        {
            foreach (var value in crossReferenceTable.Dictionary.Values)
            {
                if (value is CosObject temporaryObject)
                {
                    // Loads these objects into the object pool for access later.
                    dynamicParser.Parse(reader, temporaryObject, pool, crossReferenceTable, bruteForceSearcher,
                                        isLenientParsing, false);
                }
            }

            CosObject root = (CosObject)crossReferenceTable.Dictionary.GetItemOrDefault(CosName.ROOT);

            if (root == null)
            {
                throw new InvalidOperationException("Missing root object specification in trailer.");
            }

            var rootObject = dynamicParser.Parse(reader, root, pool, crossReferenceTable, bruteForceSearcher,
                                                 isLenientParsing, false);

            return(rootObject);
        }
Exemplo n.º 2
0
 internal PdfDocument(ILog log,
                      IInputBytes inputBytes,
                      HeaderVersion version,
                      CrossReferenceTable crossReferenceTable,
                      bool isLenientParsing,
                      ParsingCachingProviders cachingProviders,
                      IPageFactory pageFactory,
                      Catalog catalog,
                      DocumentInformation information,
                      EncryptionDictionary encryptionDictionary,
                      IPdfTokenScanner pdfScanner,
                      IFilterProvider filterProvider,
                      AcroFormFactory acroFormFactory)
 {
     this.log                  = log;
     this.inputBytes           = inputBytes;
     this.version              = version ?? throw new ArgumentNullException(nameof(version));
     this.isLenientParsing     = isLenientParsing;
     this.cachingProviders     = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
     this.encryptionDictionary = encryptionDictionary;
     this.pdfScanner           = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
     this.filterProvider       = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
     Information               = information ?? throw new ArgumentNullException(nameof(information));
     pages        = new Pages(log, catalog, pageFactory, isLenientParsing, pdfScanner);
     Structure    = new Structure(catalog, crossReferenceTable, pdfScanner);
     documentForm = new Lazy <AcroForm>(() => acroFormFactory.GetAcroForm(catalog));
 }
Exemplo n.º 3
0
        // This method is a basically a copy of the method UglyToad.PdfPig.Parser.PdfDocumentFactory.ParseTrailer()
        private static DictionaryToken ParseCatalog(CrossReferenceTable crossReferenceTable,
                                                    IPdfTokenScanner pdfTokenScanner,
                                                    out EncryptionDictionary encryptionDictionary)
        {
            encryptionDictionary = null;

            if (crossReferenceTable.Trailer.EncryptionToken != null)
            {
                if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner,
                                               out DictionaryToken encryptionDictionaryToken))
                {
                    throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
                }

                encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
            }

            var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);

            if (!rootDictionary.ContainsKey(NameToken.Type))
            {
                rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
            }

            return(rootDictionary);
        }
Exemplo n.º 4
0
 internal Structure(Catalog catalog, CrossReferenceTable crossReferenceTable,
                    IPdfTokenScanner scanner)
 {
     Catalog             = catalog ?? throw new ArgumentNullException(nameof(catalog));
     CrossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
     TokenScanner        = scanner ?? throw new ArgumentNullException(nameof(scanner));
 }
Exemplo n.º 5
0
        private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner,
                                                    out EncryptionDictionary encryptionDictionary)
        {
            encryptionDictionary = null;

            if (crossReferenceTable.Trailer.EncryptionToken != null)
            {
                if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken))
                {
                    throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
                }

                encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);

                //throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Trailer.EncryptionToken);
            }

            var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);

            if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
            {
                rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
            }

            return(rootDictionary);
        }
Exemplo n.º 6
0
 internal PdfDocument(ILog log,
                      IInputBytes inputBytes,
                      HeaderVersion version,
                      CrossReferenceTable crossReferenceTable,
                      ParsingCachingProviders cachingProviders,
                      IPageFactory pageFactory,
                      Catalog catalog,
                      DocumentInformation information,
                      EncryptionDictionary encryptionDictionary,
                      IPdfTokenScanner pdfScanner,
                      ILookupFilterProvider filterProvider,
                      AcroFormFactory acroFormFactory,
                      BookmarksProvider bookmarksProvider,
                      bool clipPaths)
 {
     this.log                  = log;
     this.inputBytes           = inputBytes;
     this.version              = version ?? throw new ArgumentNullException(nameof(version));
     this.cachingProviders     = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
     this.encryptionDictionary = encryptionDictionary;
     this.pdfScanner           = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
     this.filterProvider       = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
     this.bookmarksProvider    = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider));
     this.clipPaths            = clipPaths;
     Information               = information ?? throw new ArgumentNullException(nameof(information));
     pages        = new Pages(catalog, pageFactory, pdfScanner);
     Structure    = new Structure(catalog, crossReferenceTable, pdfScanner);
     Advanced     = new AdvancedPdfDocumentAccess(pdfScanner, filterProvider, catalog);
     documentForm = new Lazy <AcroForm>(() => acroFormFactory.GetAcroForm(catalog));
 }
Exemplo n.º 7
0
        public CosBase Parse(IRandomAccessRead reader, long objectNumber, int objectGeneration,
                             CosObjectPool pool, CrossReferenceTable crossReferenceTable,
                             BruteForceSearcher bruteForceSearcher,
                             bool isLenient,
                             bool requireExistingObject)
        {
            if (pool == null)
            {
                throw new ArgumentNullException(nameof(pool));
            }

            var key = new CosObjectKey(objectNumber, objectGeneration);

            var pdfObject = pool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            if (crossReferenceTable == null)
            {
                throw new ArgumentNullException(nameof(crossReferenceTable));
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenient && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                return(CosNull.Null);
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, pool, isLenient));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, objectNumber, pool, crossReferenceTable, bruteForceSearcher, isLenient));
        }
Exemplo n.º 8
0
 public ParsingArguments(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable, ParsingCachingProviders cachingProviders, IContainer container, bool isLenientParsing)
 {
     Reader = reader ?? throw new ArgumentNullException(nameof(reader));
     CrossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
     CachingProviders    = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
     Container           = container ?? throw new ArgumentNullException(nameof(container));
     IsLenientParsing    = isLenientParsing;
     Log = new NoOpLog();
 }
Exemplo n.º 9
0
        public override byte[] Print()
        {
            _writer.Write(Header);
            _writer.Write(Body.Print());
            _writer.Write(Trailer.Print());
            _writer.Write(CrossReferenceTable.Print());
            _writer.Write("%EOF");

            return(FinishBuffer());
        }
Exemplo n.º 10
0
        public CosBase Parse(IRandomAccessRead reader, CosObject obj, CosObjectPool pool,
                             CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenient, bool requireExistingObject)
        {
            if (obj == null)
            {
                throw new ArgumentNullException(nameof(obj));
            }

            return(Parse(reader, obj.GetObjectNumber(), obj.GetGenerationNumber(), pool,
                         crossReferenceTable, bruteForceSearcher, isLenient, requireExistingObject));
        }
Exemplo n.º 11
0
        /// <summary>
        /// Merge the set of PDF documents.
        /// </summary>
        public static byte[] Merge(IReadOnlyList <byte[]> files, IReadOnlyList <IReadOnlyList <int> > pagesBundle = null)
        {
            if (files == null)
            {
                throw new ArgumentNullException(nameof(files));
            }

            const bool isLenientParsing = false;

            var documentBuilder = new DocumentMerger();

            foreach (var fileIndex in Enumerable.Range(0, files.Count))
            {
                var file = files[fileIndex];

                IReadOnlyList <int> pages = null;
                if (pagesBundle != null && fileIndex < pagesBundle.Count)
                {
                    pages = pagesBundle[fileIndex];
                }

                var inputBytes  = new ByteArrayInputBytes(file);
                var coreScanner = new CoreTokenScanner(inputBytes);

                var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);

                var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log),
                                                                    new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));

                CrossReferenceTable crossReference = null;

                // ReSharper disable once AccessToModifiedClosure
                var locationProvider = new ObjectLocationProvider(() => crossReference, inputBytes);

                var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance);

                var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing);
                crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner);

                var catalogDictionaryToken = ParseCatalog(crossReference, pdfScanner, out var encryptionDictionary);
                if (encryptionDictionary != null)
                {
                    throw new PdfDocumentEncryptedException("Unable to merge document with password");
                }

                var documentCatalog = CatalogFactory.Create(crossReference.Trailer.Root, catalogDictionaryToken, pdfScanner, isLenientParsing);

                documentBuilder.AppendDocument(documentCatalog, version.Version, pdfScanner, pages);
            }

            return(documentBuilder.Build());
        }
Exemplo n.º 12
0
 public PdfObjectParser(ILog log, CosBaseParser baseParser, CosStreamParser streamParser, CrossReferenceTable crossReferenceTable,
                        BruteForceSearcher bruteForceSearcher,
                        CosObjectPool objectPool,
                        ObjectStreamParser objectStreamParser)
 {
     this.log                 = log ?? new NoOpLog();
     this.baseParser          = baseParser ?? throw new ArgumentNullException(nameof(baseParser));
     this.streamParser        = streamParser ?? throw new ArgumentNullException(nameof(streamParser));
     this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
     this.bruteForceSearcher  = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
     this.objectPool          = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
     this.objectStreamParser  = objectStreamParser ?? throw new ArgumentNullException(nameof(objectStreamParser));
 }
Exemplo n.º 13
0
        private static (IndirectReference, DictionaryToken) ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner,
                                                                         out EncryptionDictionary encryptionDictionary)
        {
            encryptionDictionary = GetEncryptionDictionary(crossReferenceTable, pdfTokenScanner);

            var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);

            if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
            {
                rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
            }

            return(crossReferenceTable.Trailer.Root, rootDictionary);
        }
Exemplo n.º 14
0
        private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner)
        {
            if (crossReferenceTable.Trailer.EncryptionToken != null)
            {
                throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Trailer.EncryptionToken);
            }

            var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);

            if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
            {
                rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
            }

            return(rootDictionary);
        }
Exemplo n.º 15
0
 internal PdfDocument(ILog log, IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
                      bool isLenientParsing,
                      ParsingCachingProviders cachingProviders,
                      IPageFactory pageFactory,
                      IPdfObjectParser pdfObjectParser,
                      Catalog catalog,
                      DocumentInformation information)
 {
     this.log                 = log;
     this.reader              = reader ?? throw new ArgumentNullException(nameof(reader));
     this.version             = version ?? throw new ArgumentNullException(nameof(version));
     this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
     this.isLenientParsing    = isLenientParsing;
     this.cachingProviders    = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
     Information              = information ?? throw new ArgumentNullException(nameof(information));
     Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
     Pages   = new Pages(log, Catalog, pdfObjectParser, pageFactory, reader, isLenientParsing);
 }
Exemplo n.º 16
0
 internal PdfDocument(ILog log,
                      IInputBytes inputBytes,
                      HeaderVersion version,
                      CrossReferenceTable crossReferenceTable,
                      bool isLenientParsing,
                      ParsingCachingProviders cachingProviders,
                      IPageFactory pageFactory,
                      Catalog catalog,
                      DocumentInformation information, IPdfTokenScanner pdfScanner)
 {
     this.log              = log;
     this.inputBytes       = inputBytes;
     this.version          = version ?? throw new ArgumentNullException(nameof(version));
     this.isLenientParsing = isLenientParsing;
     this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
     this.pdfScanner       = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
     Information           = information ?? throw new ArgumentNullException(nameof(information));
     pages     = new Pages(log, catalog, pageFactory, isLenientParsing, pdfScanner);
     Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
 }
Exemplo n.º 17
0
        private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner)
        {
            if (crossReferenceTable.Dictionary.ContainsKey(NameToken.Encrypt))
            {
                throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Dictionary);
            }

            if (!crossReferenceTable.Dictionary.TryGet(NameToken.Root, out var rootToken))
            {
                throw new PdfDocumentFormatException($"Missing root object specification in trailer: {crossReferenceTable.Dictionary}.");
            }

            var rootDictionary = DirectObjectFinder.Get <DictionaryToken>(rootToken, pdfTokenScanner);

            if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
            {
                rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
            }

            return(rootDictionary);
        }
Exemplo n.º 18
0
        private static EncryptionDictionary GetEncryptionDictionary(CrossReferenceTable crossReferenceTable, IPdfTokenScanner pdfTokenScanner)
        {
            if (crossReferenceTable.Trailer.EncryptionToken == null)
            {
                return(null);
            }


            if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken))
            {
                if (DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out NullToken _))
                {
                    return(null);
                }

                throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
            }

            var result = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);

            return(result);
        }
Exemplo n.º 19
0
        /// <summary>
        /// Check that the offsets in the cross reference are correct.
        /// </summary>
        public static bool ValidateCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable crossReferenceTable, ILog log,
                                                         out IReadOnlyDictionary <IndirectReference, long> actualOffsets)
        {
            actualOffsets = crossReferenceTable.ObjectOffsets;

            if (ValidateXrefOffsets(bytes, crossReferenceTable.ObjectOffsets, log))
            {
                return(true);
            }

            var builderOffsets = new Dictionary <IndirectReference, long>();

            var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes);

            if (bruteForceOffsets.Count > 0)
            {
                // find all object streams
                foreach (var entry in crossReferenceTable.ObjectOffsets)
                {
                    var offset = entry.Value;
                    if (offset < 0)
                    {
                        // Trust stream offsets for now.
                        // TODO: more validation of streams.
                        builderOffsets[entry.Key] = entry.Value;
                    }

                    foreach (var item in bruteForceOffsets)
                    {
                        builderOffsets[item.Key] = item.Value;
                    }
                }

                actualOffsets = builderOffsets;
            }

            return(false);
        }
Exemplo n.º 20
0
        /// <summary>
        /// Check that the offsets in the cross reference are correct.
        /// </summary>
        public void CheckCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable xrefTrailerResolver, bool isLenientParsing)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenientParsing)
            {
                return;
            }

            Dictionary <IndirectReference, long> xrefOffset = xrefTrailerResolver.ObjectOffsets.ToDictionary(x => x.Key, x => x.Value);

            if (ValidateXrefOffsets(bytes, xrefOffset))
            {
                return;
            }

            IReadOnlyDictionary <IndirectReference, long> bfCOSObjectKeyOffsets = getBFCosObjectOffsets();

            if (bfCOSObjectKeyOffsets.Count > 0)
            {
                List <IndirectReference> objStreams = new List <IndirectReference>();
                // find all object streams
                foreach (var entry in xrefOffset)
                {
                    long offset = entry.Value;
                    if (offset < 0)
                    {
                        IndirectReference objStream = new IndirectReference(-offset, 0);
                        if (!objStreams.Contains(objStream))
                        {
                            objStreams.Add(new IndirectReference(-offset, 0));
                        }
                    }
                }
                // remove all found object streams
                if (objStreams.Count > 0)
                {
                    foreach (IndirectReference key in objStreams)
                    {
                        if (bfCOSObjectKeyOffsets.ContainsKey(key))
                        {
                            // remove all parsed objects which are part of an object stream
                            //ISet<long> objects = xrefTrailerResolver
                            //    .getContainedObjectNumbers((int)(key.Number));
                            //foreach (long objNr in objects)
                            //{
                            //    CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0);

                            //    if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0)
                            //    {
                            //        bfCOSObjectKeyOffsets.Remove(streamObjectKey);
                            //    }
                            //}
                        }
                        else
                        {
                            // remove all objects which are part of an object stream which wasn't found
                            //ISet<long> objects = xrefTrailerResolver
                            //    .getContainedObjectNumbers((int)(key.Number));
                            //foreach (long objNr in objects)
                            //{
                            //    xrefOffset.Remove(new CosObjectKey(objNr, 0));
                            //}
                        }
                    }
                }

                foreach (var item in bfCOSObjectKeyOffsets)
                {
                    xrefOffset[item.Key] = item.Value;
                }
            }
        }
Exemplo n.º 21
0
        private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing, string password)
        {
            var log            = container.Get <ILog>();
            var filterProvider = container.Get <IFilterProvider>();
            var catalogFactory = new CatalogFactory();
            var cMapCache      = new CMapCache(new CMapParser());

            CrossReferenceTable crossReferenceTable = null;

            var bruteForceSearcher = new BruteForceSearcher(inputBytes);
            var xrefValidator      = new XrefOffsetValidator(log);
            var objectChecker      = new XrefCosOffsetChecker(log, bruteForceSearcher);

            // We're ok with this since our intent is to lazily load the cross reference table.
            // ReSharper disable once AccessToModifiedClosure
            var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, bruteForceSearcher);
            var pdfScanner       = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance);

            var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
            var crossReferenceParser       = new CrossReferenceParser(log, xrefValidator, objectChecker, crossReferenceStreamParser, new CrossReferenceTableParser());

            var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);

            var crossReferenceOffset = container.Get <FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(xrefValidator);

            crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing);

            crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, pdfScanner, scanner);

            var trueTypeFontParser           = new TrueTypeFontParser();
            var fontDescriptorFactory        = new FontDescriptorFactory();
            var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
            var compactFontFormatParser      = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
                                                                                                                     new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);

            var rootDictionary = ParseTrailer(crossReferenceTable, isLenientParsing, pdfScanner, out var encryptionDictionary);

            var encryptionHandler = encryptionDictionary != null ? (IEncryptionHandler) new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, password ?? string.Empty)
                : NoOpEncryptionHandler.Instance;

            pdfScanner.UpdateEncryptionHandler(encryptionHandler);

            var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, compactFontFormatParser, filterProvider);
            var encodingReader = new EncodingReader(pdfScanner);

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        cMapCache,
                                                                        filterProvider, pdfScanner),
                                              new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
                                              new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
                                                                   new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
                                              new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));

            var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);

            var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
                                              new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
                                              new XObjectFactory(), log);
            var informationFactory = new DocumentInformationFactory();

            var information = informationFactory.Create(pdfScanner, crossReferenceTable.Trailer);

            var catalog = catalogFactory.Create(pdfScanner, rootDictionary);

            var caching = new ParsingCachingProviders(bruteForceSearcher, resourceContainer);

            var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider);

            return(new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information,
                                   encryptionDictionary,
                                   pdfScanner,
                                   filterProvider,
                                   acroFormFactory));
        }
Exemplo n.º 22
0
        private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing,
                                                IReadOnlyList <string> passwords, bool clipPaths)
        {
            var filterProvider = DefaultFilterProvider.Instance;

            CrossReferenceTable crossReferenceTable = null;

            var xrefValidator = new XrefOffsetValidator(log);

            // We're ok with this since our intent is to lazily load the cross reference table.
            // ReSharper disable once AccessToModifiedClosure
            var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, inputBytes);
            var pdfScanner       = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance);

            var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
            var crossReferenceParser       = new CrossReferenceParser(log, xrefValidator, crossReferenceStreamParser);

            var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);

            var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner,
                                                                                      isLenientParsing) + version.OffsetInFile;

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(xrefValidator);

            crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing);

            crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing,
                                                             crossReferenceOffset,
                                                             version.OffsetInFile,
                                                             pdfScanner,
                                                             scanner);

            var(rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing,
                                                              pdfScanner,
                                                              out var encryptionDictionary);

            var encryptionHandler = encryptionDictionary != null ?
                                    (IEncryptionHandler) new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, passwords)
                : NoOpEncryptionHandler.Instance;

            pdfScanner.UpdateEncryptionHandler(encryptionHandler);

            var cidFontFactory = new CidFontFactory(pdfScanner, filterProvider);
            var encodingReader = new EncodingReader(pdfScanner);

            var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader);

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        filterProvider, pdfScanner),
                                              new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance,
                                                                      type1Handler),
                                              type1Handler,
                                              new Type3FontHandler(pdfScanner, filterProvider, encodingReader));

            var resourceContainer = new ResourceStore(pdfScanner, fontFactory);

            var information = DocumentInformationFactory.Create(pdfScanner, crossReferenceTable.Trailer);

            var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing);

            var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
                                              new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
                                              log);

            var caching = new ParsingCachingProviders(resourceContainer);

            var acroFormFactory   = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable);
            var bookmarksProvider = new BookmarksProvider(log, pdfScanner);

            return(new PdfDocument(log, inputBytes, version, crossReferenceTable, caching, pageFactory, catalog, information,
                                   encryptionDictionary,
                                   pdfScanner,
                                   filterProvider,
                                   acroFormFactory,
                                   bookmarksProvider,
                                   clipPaths));
        }
Exemplo n.º 23
0
 public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider, CrossReferenceTable crossReferenceTable)
 {
     this.tokenScanner        = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner));
     this.filterProvider      = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
     this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
 }
        /// <summary>
        /// Check that the offsets in the cross reference are correct.
        /// </summary>
        public static bool ValidateCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable crossReferenceTable, ILog log,
                                                         out IReadOnlyDictionary <IndirectReference, long> actualOffsets)
        {
            actualOffsets = crossReferenceTable.ObjectOffsets;

            if (ValidateXrefOffsets(bytes, crossReferenceTable.ObjectOffsets, log))
            {
                return(true);
            }

            var builderOffsets = new Dictionary <IndirectReference, long>();

            var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes);

            if (bruteForceOffsets.Count > 0)
            {
                var objStreams = new List <IndirectReference>();

                // find all object streams
                foreach (var entry in crossReferenceTable.ObjectOffsets)
                {
                    var offset = entry.Value;
                    if (offset < 0)
                    {
                        var objStream = new IndirectReference(-offset, 0);
                        if (!objStreams.Contains(objStream))
                        {
                            objStreams.Add(new IndirectReference(-offset, 0));
                        }
                    }

                    // remove all found object streams
                    if (objStreams.Count > 0)
                    {
                        foreach (var key in objStreams)
                        {
                            if (bruteForceOffsets.ContainsKey(key))
                            {
                                // remove all parsed objects which are part of an object stream
                                //ISet<long> objects = xrefTrailerResolver
                                //    .getContainedObjectNumbers((int)(key.Number));
                                //foreach (long objNr in objects)
                                //{
                                //    CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0);

                                //    if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0)
                                //    {
                                //        bfCOSObjectKeyOffsets.Remove(streamObjectKey);
                                //    }
                                //}
                            }
                            else
                            {
                                // remove all objects which are part of an object stream which wasn't found
                                //ISet<long> objects = xrefTrailerResolver
                                //    .getContainedObjectNumbers((int)(key.Number));
                                //foreach (long objNr in objects)
                                //{
                                //    xrefOffset.Remove(new CosObjectKey(objNr, 0));
                                //}
                            }
                        }
                    }

                    foreach (var item in bruteForceOffsets)
                    {
                        builderOffsets[item.Key] = item.Value;
                    }
                }

                actualOffsets = builderOffsets;
            }

            return(false);
        }
Exemplo n.º 25
0
        private CosBase ParseCompressedStreamObject(IRandomAccessRead reader, long streamObjectNumber, long requestedNumber, CosObjectPool objectPool, CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenientParsing)
        {
            var baseStream = Parse(reader, streamObjectNumber, 0, objectPool, crossReferenceTable, bruteForceSearcher,
                                   isLenientParsing, true);

            if (!(baseStream is PdfRawStream stream))
            {
                log.Warn($"Could not find a stream for the object number, defaults to returning CosNull: {streamObjectNumber}");

                return(CosNull.Null);
            }

            var objects = objectStreamParser.Parse(stream, objectPool);

            // register all objects which are referenced to be contained in object stream
            foreach (var next in objects)
            {
                var streamKey = new CosObjectKey(next);
                var offset    = TryGet(streamKey, crossReferenceTable.ObjectOffsets);

                if (offset != null && offset == -streamObjectNumber)
                {
                    var streamObject = objectPool.Get(streamKey);
                    streamObject.SetObject(next.GetObject());
                }
            }

            var matchingStreamObject = objects.FirstOrDefault(x => x.GetObjectNumber() == requestedNumber);

            if (matchingStreamObject != null)
            {
                return(matchingStreamObject);
            }

            log.Error($"Could not find the object {requestedNumber} in the stream for object {streamObjectNumber}. Returning CosNull.");

            return(CosNull.Null);
        }