Пример #1
0
 public CrossReferenceParser(ILog log, XrefOffsetValidator offsetValidator,
                             XrefCosOffsetChecker xrefCosChecker,
                             CrossReferenceStreamParser crossReferenceStreamParser,
                             CrossReferenceTableParser crossReferenceTableParser)
 {
     this.log                        = log;
     this.offsetValidator            = offsetValidator;
     this.crossReferenceStreamParser = crossReferenceStreamParser;
     this.crossReferenceTableParser  = crossReferenceTableParser;
     this.xrefCosChecker             = xrefCosChecker;
 }
Пример #2
0
        public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation,
                                         long offsetCorrection,
                                         IPdfTokenScanner pdfScanner,
                                         ISeekableTokenScanner tokenScanner)
        {
            long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing);

            if (fixedOffset > -1)
            {
                crossReferenceLocation = fixedOffset;

                log.Debug($"Found the first cross reference table or stream at {fixedOffset}.");
            }

            var table = new CrossReferenceTableBuilder();

            var  prevSet = new HashSet <long>();
            long previousCrossReferenceLocation = crossReferenceLocation;

            var missedAttempts = 0;

            // Parse all cross reference tables and streams.
            while (previousCrossReferenceLocation > 0 && missedAttempts < 100)
            {
                log.Debug($"Reading cross reference table or stream at {previousCrossReferenceLocation}.");

                if (previousCrossReferenceLocation >= bytes.Length)
                {
                    break;
                }

                // seek to xref table
                tokenScanner.Seek(previousCrossReferenceLocation);

                tokenScanner.MoveNext();

                if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
                {
                    missedAttempts = 0;
                    log.Debug("Element was cross reference table.");

                    CrossReferenceTablePart tablePart = CrossReferenceTableParser.Parse(tokenScanner,
                                                                                        previousCrossReferenceLocation, isLenientParsing);

                    var nextOffset = tablePart.GetPreviousOffset();

                    if (nextOffset >= 0)
                    {
                        nextOffset += offsetCorrection;
                    }

                    previousCrossReferenceLocation = nextOffset;

                    DictionaryToken tableDictionary = tablePart.Dictionary;

                    CrossReferenceTablePart streamPart = null;

                    // check for a XRef stream, it may contain some object ids of compressed objects
                    if (tableDictionary.ContainsKey(NameToken.XrefStm))
                    {
                        log.Debug("Cross reference table contained referenced to stream. Reading the stream.");

                        int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;

                        // check the xref stream reference
                        fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
                        if (fixedOffset > -1 && fixedOffset != streamOffset)
                        {
                            log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}");

                            streamOffset = (int)fixedOffset;

                            // Update the cross reference table to be a stream instead.
                            tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
                            tablePart       = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset,
                                                                          tablePart.Previous, tableDictionary, tablePart.Type);
                        }

                        // Read the stream from the table.
                        if (streamOffset > 0)
                        {
                            try
                            {
                                TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart);
                            }
                            catch (InvalidOperationException ex)
                            {
                                if (isLenientParsing)
                                {
                                    log.Error("Failed to parse /XRefStm at offset " + streamOffset, ex);
                                }
                                else
                                {
                                    throw;
                                }
                            }
                        }
                        else
                        {
                            if (isLenientParsing)
                            {
                                log.Error("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                            else
                            {
                                throw new PdfDocumentFormatException("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                        }
                    }

                    table.Add(tablePart);

                    if (streamPart != null)
                    {
                        table.Add(streamPart);
                    }
                }