示例#1
0
        public long CheckXRefOffset(long startXRefOffset, ISeekableTokenScanner scanner, IInputBytes inputBytes, bool isLenientParsing)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenientParsing)
            {
                return(startXRefOffset);
            }

            scanner.Seek(startXRefOffset);

            scanner.MoveNext();

            if (ReferenceEquals(scanner.CurrentToken, OperatorToken.Xref))
            {
                return(startXRefOffset);
            }

            if (startXRefOffset > 0)
            {
                if (CheckXRefStreamOffset(startXRefOffset, scanner, true))
                {
                    return(startXRefOffset);
                }

                return(CalculateXRefFixedOffset(startXRefOffset, scanner, inputBytes));
            }

            // can't find a valid offset
            return(-1);
        }
        private static DictionaryToken ParseTrailer(ISeekableTokenScanner scanner, bool isLenientParsing)
        {
            if (scanner.CurrentToken is OperatorToken trailerToken && trailerToken.Data == "trailer")
            {
                if (!scanner.TryReadToken(out DictionaryToken trailerDictionary))
                {
                    throw new PdfDocumentFormatException($"Expected to find a dictionary in the trailer but instead found: {scanner.CurrentToken}.");
                }

                return(trailerDictionary);
            }

            if (isLenientParsing)
            {
                var foundTrailer = false;
                while (scanner.MoveNext())
                {
                    if (scanner.CurrentToken is OperatorToken op && op.Data == "trailer")
                    {
                        foundTrailer = true;

                        break;
                    }
                }

                if (foundTrailer && scanner.TryReadToken(out DictionaryToken trailerDictionary))
                {
                    return(trailerDictionary);
                }
            }

            throw new PdfDocumentFormatException("No trailer dictionary was present.");
        }
示例#3
0
        public static HeaderVersion Parse([NotNull] ISeekableTokenScanner scanner, IInputBytes inputBytes, bool isLenientParsing, ILog log)
        {
            if (scanner == null)
            {
                throw new ArgumentNullException(nameof(scanner));
            }

            var startPosition = scanner.CurrentPosition;

            const int    junkTokensTolerance = 30;
            var          attempts            = 0;
            CommentToken comment;

            do
            {
                if (attempts == junkTokensTolerance || !scanner.MoveNext())
                {
                    if (!TryBruteForceVersionLocation(startPosition, inputBytes, out var version))
                    {
                        throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                    }

                    scanner.Seek(startPosition);
                    return(version);
                }

                comment = scanner.CurrentToken as CommentToken;

                attempts++;
            } while (comment == null);

            return(GetHeaderVersionAndResetScanner(comment, scanner, isLenientParsing, log));
        }
示例#4
0
        private void HandleOperator(OperatorToken token, IInputBytes bytes, ISeekableTokenScanner scanner, PreviousTokenSet set, List <DictionaryToken> dictionaries)
        {
            switch (token.Data)
            {
            case "dict":
                var number     = ((NumericToken)set[0]).Int;
                var dictionary = ReadDictionary(number, scanner);

                dictionaries.Add(dictionary);
                break;

            case "currentfile":
                if (!scanner.MoveNext() || scanner.CurrentToken != OperatorToken.Eexec)
                {
                    return;
                }

                // For now we will not read this stuff.
                SkipEncryptedContent(bytes);
                break;

            default:
                return;
            }
        }
示例#5
0
        public HeaderVersion Parse([NotNull] ISeekableTokenScanner scanner, bool isLenientParsing)
        {
            if (scanner == null)
            {
                throw new ArgumentNullException(nameof(scanner));
            }

            // Read the first token
            if (!scanner.MoveNext())
            {
                throw new PdfDocumentFormatException($"Could not read the first token in the document at position {scanner.CurrentPosition}.");
            }

            var comment = scanner.CurrentToken as CommentToken;

            var junkSkip = isLenientParsing ? 2 : 0;
            var attempts = 0;

            while (comment == null)
            {
                if (attempts == junkSkip)
                {
                    throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                }

                if (!scanner.MoveNext())
                {
                    throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                }

                comment = scanner.CurrentToken as CommentToken;

                attempts++;
            }

            var match = VersionRegex.Match(comment.Data);

            if (!match.Success || !decimal.TryParse(match.Groups["version"].Value, out decimal version))
            {
                if (isLenientParsing)
                {
                    log.Warn($"Did not find a version header of the correct format, defaulting to 1.4 since lenient. Header was: {comment.Data}.");

                    return(new HeaderVersion(1.4m, "PDF-1.4"));
                }

                throw new PdfDocumentFormatException($"The comment which should have provided the version was in the wrong format: {comment.Data}.");
            }

            scanner.Seek(0);

            var result = new HeaderVersion(version, comment.Data);

            return(result);
        }
示例#6
0
        private static DictionaryToken ReadDictionary(int keys, ISeekableTokenScanner scanner)
        {
            IToken previousToken = null;

            var dictionary = new Dictionary <NameToken, IToken>();

            // Skip the operators "dup" etc to reach "begin".
            while (scanner.MoveNext() && (!(scanner.CurrentToken is OperatorToken operatorToken) || operatorToken.Data != "begin"))
            {
                // Skipping.
            }

            for (int i = 0; i < keys; i++)
            {
                if (!scanner.TryReadToken(out NameToken key))
                {
                    return(new DictionaryToken(dictionary));
                }

                if (key.Data.Equals(NameToken.Encoding))
                {
                    var encoding = ReadEncoding(scanner);
                    dictionary[key] = (IToken)encoding.encoding ?? encoding.name;
                    continue;
                }

                while (scanner.MoveNext())
                {
                    if (scanner.CurrentToken == OperatorToken.Def)
                    {
                        dictionary[key] = previousToken;

                        break;
                    }

                    if (scanner.CurrentToken == OperatorToken.Dict)
                    {
                        if (!(previousToken is NumericToken numeric))
                        {
                            return(new DictionaryToken(dictionary));
                        }

                        var inner = ReadDictionary(numeric.Int, scanner);

                        previousToken = inner;
                    }
                    else if (scanner.CurrentToken == OperatorToken.Readonly)
                    {
                        // skip
                    }
                    else if (scanner.CurrentToken is OperatorToken op && op.Data == "end")
                    {
                        // skip
                    }
示例#7
0
        public long Validate(long crossReferenceOffset, ISeekableTokenScanner scanner, IInputBytes bytes, bool isLenientParsing)
        {
            long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceOffset, scanner, bytes, isLenientParsing);

            if (fixedOffset > -1)
            {
                crossReferenceOffset = fixedOffset;
            }

            return(crossReferenceOffset);
        }
示例#8
0
        public static HeaderVersion Parse([NotNull] ISeekableTokenScanner scanner, bool isLenientParsing, ILog log)
        {
            if (scanner == null)
            {
                throw new ArgumentNullException(nameof(scanner));
            }

            // Read the first token
            if (!scanner.MoveNext())
            {
                throw new PdfDocumentFormatException($"Could not read the first token in the document at position {scanner.CurrentPosition}.");
            }

            var comment = scanner.CurrentToken as CommentToken;

            var junkSkip = isLenientParsing ? 2 : 0;
            var attempts = 0;

            while (comment == null)
            {
                if (attempts == junkSkip)
                {
                    throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                }

                if (!scanner.MoveNext())
                {
                    throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                }

                comment = scanner.CurrentToken as CommentToken;

                attempts++;
            }

            if (comment.Data.IndexOf("PDF-1.", StringComparison.OrdinalIgnoreCase) != 0 && comment.Data.IndexOf("FDF-1.", StringComparison.OrdinalIgnoreCase) != 0)
            {
                return(HandleMissingVersion(comment, isLenientParsing, log));
            }

            const int toDecimalStartLength = 4;

            if (!decimal.TryParse(comment.Data.Substring(toDecimalStartLength), out var version))
            {
                return(HandleMissingVersion(comment, isLenientParsing, log));
            }

            scanner.Seek(0);

            var result = new HeaderVersion(version, comment.Data);

            return(result);
        }
示例#9
0
        private bool CheckXRefStreamOffset(long startXRefOffset, ISeekableTokenScanner scanner, bool isLenient)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenient || startXRefOffset == 0)
            {
                return(true);
            }

            scanner.Seek(startXRefOffset);

            if (scanner.TryReadToken(out NumericToken objectNumber))
            {
                try
                {
                    if (!scanner.TryReadToken(out NumericToken generation))
                    {
                        log.Debug($"When checking offset at {startXRefOffset} did not find the generation number. Got: {objectNumber} {generation}.");
                    }

                    scanner.MoveNext();

                    var obj = scanner.CurrentToken;

                    if (!ReferenceEquals(obj, OperatorToken.StartObject))
                    {
                        scanner.Seek(startXRefOffset);
                        return(false);
                    }

                    // check the dictionary to avoid false positives
                    if (!scanner.TryReadToken(out DictionaryToken dictionary))
                    {
                        scanner.Seek(startXRefOffset);
                    }

                    if (dictionary.TryGet(NameToken.Type, out var type) && NameToken.Xref.Equals(type))
                    {
                        return(true);
                    }
                }
                catch (Exception ex)
                {
                    log.Error("Couldn't read the xref stream object.", ex);
                }
            }
            else
            {
                log.Error($"When looking for the cross reference stream object we sought a number but found: {scanner.CurrentToken}.");
            }

            return(false);
        }
示例#10
0
        public long GetFirstCrossReferenceOffset(IInputBytes bytes, ISeekableTokenScanner scanner, bool isLenientParsing)
        {
            if (bytes == null)
            {
                throw new ArgumentNullException(nameof(bytes));
            }

            if (scanner == null)
            {
                throw new ArgumentNullException(nameof(scanner));
            }

            var fileLength = bytes.Length;

            var offsetFromEnd = fileLength < EndOfFileSearchRange ? (int)fileLength : EndOfFileSearchRange;

            var startPosition = fileLength - offsetFromEnd;

            bytes.Seek(startPosition);

            var startXrefPosition = GetStartXrefPosition(bytes, offsetFromEnd);

            scanner.Seek(startXrefPosition);

            if (!scanner.TryReadToken(out OperatorToken startXrefToken) || startXrefToken.Data != "startxref")
            {
                throw new InvalidOperationException($"The start xref position we found was not correct. Found {startXrefPosition} but it was occupied by token {scanner.CurrentToken}.");
            }

            NumericToken numeric = null;

            while (scanner.MoveNext())
            {
                if (scanner.CurrentToken is NumericToken token)
                {
                    numeric = token;
                    break;
                }

                if (!(scanner.CurrentToken is CommentToken))
                {
                    throw new PdfDocumentFormatException($"Found an unexpected token following 'startxref': {scanner.CurrentToken}.");
                }
            }

            if (numeric == null)
            {
                throw new PdfDocumentFormatException($"Could not find the numeric value following 'startxref'. Searching from position {startXrefPosition}.");
            }

            return(numeric.Long);
        }
示例#11
0
        private static void HandleOperator(OperatorToken token, ISeekableTokenScanner scanner, PreviousTokenSet set, List <DictionaryToken> dictionaries)
        {
            switch (token.Data)
            {
            case "dict":
                var number     = ((NumericToken)set[0]).Int;
                var dictionary = ReadDictionary(number, scanner);

                dictionaries.Add(dictionary);
                break;

            default:
                return;
            }
        }
示例#12
0
        private long BruteForceSearchForXref(long xrefOffset, ISeekableTokenScanner scanner, IInputBytes reader)
        {
            long newOffset       = -1;
            long newOffsetTable  = -1;
            long newOffsetStream = -1;

            BruteForceSearchForTables(reader);

            BfSearchForXRefStreams(reader);

            if (bfSearchXRefTablesOffsets != null)
            {
                // TODO to be optimized, this won't work in every case
                newOffsetTable = SearchNearestValue(bfSearchXRefTablesOffsets, xrefOffset);
            }
            if (bfSearchXRefStreamsOffsets != null)
            {
                // TODO to be optimized, this won't work in every case
                newOffsetStream = SearchNearestValue(bfSearchXRefStreamsOffsets, xrefOffset);
            }
            // choose the nearest value
            if (newOffsetTable > -1 && newOffsetStream > -1)
            {
                long differenceTable  = xrefOffset - newOffsetTable;
                long differenceStream = xrefOffset - newOffsetStream;
                if (Math.Abs(differenceTable) > Math.Abs(differenceStream))
                {
                    newOffset = newOffsetStream;
                    bfSearchXRefStreamsOffsets.Remove(newOffsetStream);
                }
                else
                {
                    newOffset = newOffsetTable;
                    bfSearchXRefTablesOffsets.Remove(newOffsetTable);
                }
            }
            else if (newOffsetTable > -1)
            {
                newOffset = newOffsetTable;
                bfSearchXRefTablesOffsets.Remove(newOffsetTable);
            }
            else if (newOffsetStream > -1)
            {
                newOffset = newOffsetStream;
                bfSearchXRefStreamsOffsets.Remove(newOffsetStream);
            }
            return(newOffset);
        }
示例#13
0
        public CrossReferenceTable ParseNew(long crossReferenceLocation, ISeekableTokenScanner scanner,
                                            bool isLenientParsing)
        {
            var previousLocation = crossReferenceLocation;

            var visitedCrossReferences = new HashSet <long>();

            while (previousLocation >= 0)
            {
                scanner.Seek(crossReferenceLocation);

                scanner.MoveNext();

                if (scanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
                {
                    var table = crossReferenceTableParser.Parse(scanner, crossReferenceLocation, isLenientParsing);

                    previousLocation = table.Dictionary.GetLongOrDefault(CosName.PREV, -1);
                }
示例#14
0
        private long CalculateXRefFixedOffset(long objectOffset, ISeekableTokenScanner scanner, IInputBytes inputBytes)
        {
            if (objectOffset < 0)
            {
                log.Error($"Invalid object offset {objectOffset} when searching for a xref table/stream");
                return(0);
            }

            // start a brute force search for all xref tables and try to find the offset we are looking for
            var newOffset = BruteForceSearchForXref(objectOffset, scanner, inputBytes);

            if (newOffset > -1)
            {
                log.Debug($"Fixed reference for xref table/stream {objectOffset} -> {newOffset}");

                return(newOffset);
            }

            log.Error($"Can\'t find the object xref table/stream at offset {objectOffset}");

            return(0);
        }
示例#15
0
        public static HeaderVersion Parse([NotNull] ISeekableTokenScanner scanner, bool isLenientParsing, ILog log)
        {
            if (scanner == null)
            {
                throw new ArgumentNullException(nameof(scanner));
            }

            // Read the first token
            if (!scanner.MoveNext())
            {
                throw new PdfDocumentFormatException($"Could not read the first token in the document at position {scanner.CurrentPosition}.");
            }

            var comment = scanner.CurrentToken as CommentToken;

            const int junkTokensTolerance = 25;
            var       attempts            = 0;

            while (comment == null)
            {
                if (attempts == junkTokensTolerance)
                {
                    throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                }

                if (!scanner.MoveNext())
                {
                    throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                }

                comment = scanner.CurrentToken as CommentToken;

                attempts++;
            }

            if (comment.Data.IndexOf("PDF-1.", StringComparison.OrdinalIgnoreCase) != 0 && comment.Data.IndexOf("FDF-1.", StringComparison.OrdinalIgnoreCase) != 0)
            {
                return(HandleMissingVersion(comment, isLenientParsing, log));
            }

            const int toDecimalStartLength = 4;

            if (!decimal.TryParse(comment.Data.Substring(toDecimalStartLength),
                                  NumberStyles.Number,
                                  CultureInfo.InvariantCulture,
                                  out var version))
            {
                return(HandleMissingVersion(comment, isLenientParsing, log));
            }

            var atEnd  = scanner.CurrentPosition == scanner.Length;
            var rewind = atEnd ? 1 : 2;

            var commentOffset = scanner.CurrentPosition - comment.Data.Length - rewind;

            scanner.Seek(0);

            var result = new HeaderVersion(version, comment.Data, commentOffset);

            return(result);
        }
示例#16
0
        private static HeaderVersion GetHeaderVersionAndResetScanner(CommentToken comment, ISeekableTokenScanner scanner, bool isLenientParsing, ILog log)
        {
            if (comment.Data.IndexOf("PDF-1.", StringComparison.OrdinalIgnoreCase) != 0 && comment.Data.IndexOf("FDF-1.", StringComparison.OrdinalIgnoreCase) != 0)
            {
                return(HandleMissingVersion(comment, isLenientParsing, log));
            }

            const int toDecimalStartLength = 4;

            if (!decimal.TryParse(comment.Data.Substring(toDecimalStartLength),
                                  NumberStyles.Number,
                                  CultureInfo.InvariantCulture,
                                  out var version))
            {
                return(HandleMissingVersion(comment, isLenientParsing, log));
            }

            var atEnd  = scanner.CurrentPosition == scanner.Length;
            var rewind = atEnd ? 1 : 2;

            var commentOffset = scanner.CurrentPosition - comment.Data.Length - rewind;

            scanner.Seek(0);

            var result = new HeaderVersion(version, comment.Data, commentOffset);

            return(result);
        }
示例#17
0
        private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing, string password)
        {
            var log            = container.Get <ILog>();
            var filterProvider = container.Get <IFilterProvider>();
            var catalogFactory = new CatalogFactory();
            var cMapCache      = new CMapCache(new CMapParser());

            CrossReferenceTable crossReferenceTable = null;

            var bruteForceSearcher = new BruteForceSearcher(inputBytes);
            var xrefValidator      = new XrefOffsetValidator(log);
            var objectChecker      = new XrefCosOffsetChecker(log, bruteForceSearcher);

            // We're ok with this since our intent is to lazily load the cross reference table.
            // ReSharper disable once AccessToModifiedClosure
            var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, bruteForceSearcher);
            var pdfScanner       = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance);

            var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
            var crossReferenceParser       = new CrossReferenceParser(log, xrefValidator, objectChecker, crossReferenceStreamParser, new CrossReferenceTableParser());

            var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);

            var crossReferenceOffset = container.Get <FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(xrefValidator);

            crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing);

            crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, pdfScanner, scanner);

            var trueTypeFontParser           = new TrueTypeFontParser();
            var fontDescriptorFactory        = new FontDescriptorFactory();
            var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
            var compactFontFormatParser      = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
                                                                                                                     new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);

            var rootDictionary = ParseTrailer(crossReferenceTable, isLenientParsing, pdfScanner, out var encryptionDictionary);

            var encryptionHandler = encryptionDictionary != null ? (IEncryptionHandler) new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, password ?? string.Empty)
                : NoOpEncryptionHandler.Instance;

            pdfScanner.UpdateEncryptionHandler(encryptionHandler);

            var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, compactFontFormatParser, filterProvider);
            var encodingReader = new EncodingReader(pdfScanner);

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        cMapCache,
                                                                        filterProvider, pdfScanner),
                                              new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
                                              new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
                                                                   new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
                                              new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));

            var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);

            var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
                                              new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
                                              new XObjectFactory(), log);
            var informationFactory = new DocumentInformationFactory();

            var information = informationFactory.Create(pdfScanner, crossReferenceTable.Trailer);

            var catalog = catalogFactory.Create(pdfScanner, rootDictionary);

            var caching = new ParsingCachingProviders(bruteForceSearcher, resourceContainer);

            var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider);

            return(new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information,
                                   encryptionDictionary,
                                   pdfScanner,
                                   filterProvider,
                                   acroFormFactory));
        }
示例#18
0
        private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing,
                                                IReadOnlyList <string> passwords, bool clipPaths)
        {
            var filterProvider = DefaultFilterProvider.Instance;

            CrossReferenceTable crossReferenceTable = null;

            var xrefValidator = new XrefOffsetValidator(log);

            // We're ok with this since our intent is to lazily load the cross reference table.
            // ReSharper disable once AccessToModifiedClosure
            var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, inputBytes);
            var pdfScanner       = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance);

            var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
            var crossReferenceParser       = new CrossReferenceParser(log, xrefValidator, crossReferenceStreamParser);

            var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);

            var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner,
                                                                                      isLenientParsing) + version.OffsetInFile;

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(xrefValidator);

            crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing);

            crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing,
                                                             crossReferenceOffset,
                                                             version.OffsetInFile,
                                                             pdfScanner,
                                                             scanner);

            var(rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing,
                                                              pdfScanner,
                                                              out var encryptionDictionary);

            var encryptionHandler = encryptionDictionary != null ?
                                    (IEncryptionHandler) new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, passwords)
                : NoOpEncryptionHandler.Instance;

            pdfScanner.UpdateEncryptionHandler(encryptionHandler);

            var cidFontFactory = new CidFontFactory(pdfScanner, filterProvider);
            var encodingReader = new EncodingReader(pdfScanner);

            var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader);

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        filterProvider, pdfScanner),
                                              new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance,
                                                                      type1Handler),
                                              type1Handler,
                                              new Type3FontHandler(pdfScanner, filterProvider, encodingReader));

            var resourceContainer = new ResourceStore(pdfScanner, fontFactory);

            var information = DocumentInformationFactory.Create(pdfScanner, crossReferenceTable.Trailer);

            var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing);

            var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
                                              new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
                                              log);

            var caching = new ParsingCachingProviders(resourceContainer);

            var acroFormFactory   = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable);
            var bookmarksProvider = new BookmarksProvider(log, pdfScanner);

            return(new PdfDocument(log, inputBytes, version, crossReferenceTable, caching, pageFactory, catalog, information,
                                   encryptionDictionary,
                                   pdfScanner,
                                   filterProvider,
                                   acroFormFactory,
                                   bookmarksProvider,
                                   clipPaths));
        }
示例#19
0
        private static PdfDocument OpenDocument(IRandomAccessRead reader, IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing)
        {
            var log = container.Get <ILog>();

            var version = container.Get <FileHeaderParser>().Parse(scanner, isLenientParsing);

            var crossReferenceOffset = container.Get <FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);

            var pool = new CosObjectPool();

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(new XrefOffsetValidator(log, reader, container.Get <CosDictionaryParser>(),
                                                                                      container.Get <CosBaseParser>(), pool));

            crossReferenceOffset = validator.Validate(crossReferenceOffset, isLenientParsing);

            var crossReferenceTable = container.Get <CrossReferenceParser>()
                                      .Parse(reader, isLenientParsing, crossReferenceOffset, pool);

            container.Get <CrossReferenceParser>().ParseNew(crossReferenceOffset, scanner, isLenientParsing);

            var filterProvider     = container.Get <IFilterProvider>();
            var bruteForceSearcher = new BruteForceSearcher(reader);
            var pdfObjectParser    = new PdfObjectParser(container.Get <ILog>(), container.Get <CosBaseParser>(),
                                                         container.Get <CosStreamParser>(), crossReferenceTable, bruteForceSearcher, pool, container.Get <ObjectStreamParser>());

            var trueTypeFontParser    = new TrueTypeFontParser();
            var fontDescriptorFactory = new FontDescriptorFactory();

            var cidFontFactory = new CidFontFactory(fontDescriptorFactory, trueTypeFontParser, pdfObjectParser, filterProvider);

            var cMapCache = new CMapCache(new CMapParser());

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        cMapCache,
                                                                        filterProvider,
                                                                        pdfObjectParser),
                                              new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser));

            var dynamicParser     = container.Get <DynamicParser>();
            var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory);

            var pageFactory        = new PageFactory(resourceContainer, pdfObjectParser, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
            var informationFactory = new DocumentInformationFactory();
            var catalogFactory     = new CatalogFactory(pdfObjectParser);

            var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
                                    isLenientParsing);

            if (!(root is PdfDictionary rootDictionary))
            {
                throw new InvalidOperationException("Expected root dictionary, but got this: " + root);
            }

            // in some pdfs the type value "Catalog" is missing in the root object
            if (isLenientParsing && !rootDictionary.ContainsKey(CosName.TYPE))
            {
                rootDictionary.Set(CosName.TYPE, CosName.CATALOG);
            }

            var information = informationFactory.Create(pdfObjectParser, crossReferenceTable.Dictionary, reader, isLenientParsing);

            var catalog = catalogFactory.Create(rootDictionary, reader, isLenientParsing);

            var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);

            return(new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, catalog, information));
        }
示例#20
0
        private static int ProcessTokens(List <IToken> tokens, ISeekableTokenScanner scanner, CrossReferenceTablePartBuilder builder, bool isLenientParsing,
                                         int objectCount, ref TableSubsectionDefinition definition)
        {
            string GetErrorMessage()
            {
                var representation = "Invalid line format in xref table: [" + string.Join(", ", tokens.Select(x => x.ToString())) + "]";

                return(representation);
            }

            if (objectCount == definition.Count)
            {
                if (tokens.Count == 2)
                {
                    if (tokens[0] is NumericToken newFirstObjectToken && tokens[1] is NumericToken newObjectCountToken)
                    {
                        definition = new TableSubsectionDefinition(newFirstObjectToken.Long, newObjectCountToken.Int);

                        return(0);
                    }
                }

                throw new PdfDocumentFormatException($"Found a line with 2 unexpected entries in the cross reference table: {tokens[0]}, {tokens[1]}.");
            }

            if (tokens.Count <= 2)
            {
                if (!isLenientParsing)
                {
                    throw new PdfDocumentFormatException(GetErrorMessage());
                }

                return(objectCount);
            }

            var lastToken = tokens[tokens.Count - 1];

            if (lastToken is OperatorToken operatorToken)
            {
                if (operatorToken.Data == FreeEntry)
                {
                    return(objectCount + 1);
                }

                if (operatorToken.Data != InUseEntry)
                {
                    if (!isLenientParsing)
                    {
                        throw new PdfDocumentFormatException(GetErrorMessage());
                    }

                    return(objectCount);
                }

                if (tokens[0] is NumericToken offset && tokens[1] is NumericToken generationNumber)
                {
                    if (offset.Long >= builder.Offset && offset.Long <= scanner.CurrentPosition)
                    {
                        throw new PdfDocumentFormatException($"Object offset {offset} is within its own cross-reference table for object {definition.FirstNumber + objectCount}");
                    }

                    builder.Add(definition.FirstNumber + objectCount, generationNumber.Int, offset.Long);

                    return(objectCount + 1);
                }
            }
            else
            {
                if (!isLenientParsing)
                {
                    throw new PdfDocumentFormatException(GetErrorMessage());
                }
            }

            return(objectCount);
        }
示例#21
0
        public CrossReferenceTablePart Parse(ISeekableTokenScanner scanner, long offset, bool isLenientParsing)
        {
            var builder = new CrossReferenceTablePartBuilder
            {
                Offset   = offset,
                XRefType = CrossReferenceType.Table
            };

            if (scanner.CurrentPosition != offset)
            {
                scanner.Seek(offset);
            }

            scanner.MoveNext();

            if (scanner.CurrentToken is OperatorToken operatorToken)
            {
                if (operatorToken.Data == "xref")
                {
                    scanner.MoveNext();
                }
                else
                {
                    throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
                }
            }

            if (scanner.CurrentToken is NumericToken firstObjectNumber)
            {
                if (!scanner.TryReadToken(out NumericToken objectCount))
                {
                    throw new PdfDocumentFormatException($"Unexpected token following xref and {firstObjectNumber}. We found: {scanner.CurrentToken}.");
                }

                var definition = new TableSubsectionDefinition(firstObjectNumber.Long, objectCount.Int);

                var tokenizer = new EndOfLineTokenizer();

                scanner.RegisterCustomTokenizer((byte)'\r', tokenizer);
                scanner.RegisterCustomTokenizer((byte)'\n', tokenizer);

                var readingLine = false;
                var tokens      = new List <IToken>();
                var count       = 0;
                while (scanner.MoveNext())
                {
                    if (scanner.CurrentToken is EndOfLineToken)
                    {
                        if (!readingLine)
                        {
                            continue;
                        }

                        readingLine = false;

                        count = ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition);

                        tokens.Clear();

                        continue;
                    }

                    if (scanner.CurrentToken is CommentToken)
                    {
                        continue;
                    }

                    var isLineOperator = scanner.CurrentToken is OperatorToken op && (op.Data == FreeEntry || op.Data == InUseEntry);

                    if (!(scanner.CurrentToken is NumericToken) && !isLineOperator)
                    {
                        break;
                    }

                    readingLine = true;
                    tokens.Add(scanner.CurrentToken);
                }

                if (tokens.Count > 0)
                {
                    ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition);
                }

                scanner.DeregisterCustomTokenizer(tokenizer);
            }

            builder.Dictionary = ParseTrailer(scanner, isLenientParsing);

            return(builder.Build());
        }
示例#22
0
        public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation,
                                         long offsetCorrection,
                                         IPdfTokenScanner pdfScanner,
                                         ISeekableTokenScanner tokenScanner)
        {
            long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing);

            if (fixedOffset > -1)
            {
                crossReferenceLocation = fixedOffset;

                log.Debug($"Found the first cross reference table or stream at {fixedOffset}.");
            }

            var table = new CrossReferenceTableBuilder();

            var  prevSet = new HashSet <long>();
            long previousCrossReferenceLocation = crossReferenceLocation;

            var missedAttempts = 0;

            // Parse all cross reference tables and streams.
            while (previousCrossReferenceLocation > 0 && missedAttempts < 100)
            {
                log.Debug($"Reading cross reference table or stream at {previousCrossReferenceLocation}.");

                if (previousCrossReferenceLocation >= bytes.Length)
                {
                    break;
                }

                // seek to xref table
                tokenScanner.Seek(previousCrossReferenceLocation);

                tokenScanner.MoveNext();

                if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
                {
                    missedAttempts = 0;
                    log.Debug("Element was cross reference table.");

                    CrossReferenceTablePart tablePart = CrossReferenceTableParser.Parse(tokenScanner,
                                                                                        previousCrossReferenceLocation, isLenientParsing);

                    var nextOffset = tablePart.GetPreviousOffset();

                    if (nextOffset >= 0)
                    {
                        nextOffset += offsetCorrection;
                    }

                    previousCrossReferenceLocation = nextOffset;

                    DictionaryToken tableDictionary = tablePart.Dictionary;

                    CrossReferenceTablePart streamPart = null;

                    // check for a XRef stream, it may contain some object ids of compressed objects
                    if (tableDictionary.ContainsKey(NameToken.XrefStm))
                    {
                        log.Debug("Cross reference table contained referenced to stream. Reading the stream.");

                        int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;

                        // check the xref stream reference
                        fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
                        if (fixedOffset > -1 && fixedOffset != streamOffset)
                        {
                            log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}");

                            streamOffset = (int)fixedOffset;

                            // Update the cross reference table to be a stream instead.
                            tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
                            tablePart       = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset,
                                                                          tablePart.Previous, tableDictionary, tablePart.Type);
                        }

                        // Read the stream from the table.
                        if (streamOffset > 0)
                        {
                            try
                            {
                                TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart);
                            }
                            catch (InvalidOperationException ex)
                            {
                                if (isLenientParsing)
                                {
                                    log.Error("Failed to parse /XRefStm at offset " + streamOffset, ex);
                                }
                                else
                                {
                                    throw;
                                }
                            }
                        }
                        else
                        {
                            if (isLenientParsing)
                            {
                                log.Error("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                            else
                            {
                                throw new PdfDocumentFormatException("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                        }
                    }

                    table.Add(tablePart);

                    if (streamPart != null)
                    {
                        table.Add(streamPart);
                    }
                }