public CrossReferenceParser(ILog log, XrefOffsetValidator offsetValidator, XrefCosOffsetChecker xrefCosChecker, CrossReferenceStreamParser crossReferenceStreamParser, CrossReferenceTableParser crossReferenceTableParser) { this.log = log; this.offsetValidator = offsetValidator; this.crossReferenceStreamParser = crossReferenceStreamParser; this.crossReferenceTableParser = crossReferenceTableParser; this.xrefCosChecker = xrefCosChecker; }
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation, long offsetCorrection, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner) { long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing); if (fixedOffset > -1) { crossReferenceLocation = fixedOffset; log.Debug($"Found the first cross reference table or stream at {fixedOffset}."); } var table = new CrossReferenceTableBuilder(); var prevSet = new HashSet <long>(); long previousCrossReferenceLocation = crossReferenceLocation; var missedAttempts = 0; // Parse all cross reference tables and streams. while (previousCrossReferenceLocation > 0 && missedAttempts < 100) { log.Debug($"Reading cross reference table or stream at {previousCrossReferenceLocation}."); if (previousCrossReferenceLocation >= bytes.Length) { break; } // seek to xref table tokenScanner.Seek(previousCrossReferenceLocation); tokenScanner.MoveNext(); if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref") { missedAttempts = 0; log.Debug("Element was cross reference table."); CrossReferenceTablePart tablePart = CrossReferenceTableParser.Parse(tokenScanner, previousCrossReferenceLocation, isLenientParsing); var nextOffset = tablePart.GetPreviousOffset(); if (nextOffset >= 0) { nextOffset += offsetCorrection; } previousCrossReferenceLocation = nextOffset; DictionaryToken tableDictionary = tablePart.Dictionary; CrossReferenceTablePart streamPart = null; // check for a XRef stream, it may contain some object ids of compressed objects if (tableDictionary.ContainsKey(NameToken.XrefStm)) { log.Debug("Cross reference table contained referenced to stream. Reading the stream."); int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int; // check the xref stream reference fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing); if (fixedOffset > -1 && fixedOffset != streamOffset) { log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}"); streamOffset = (int)fixedOffset; // Update the cross reference table to be a stream instead. tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset)); tablePart = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset, tablePart.Previous, tableDictionary, tablePart.Type); } // Read the stream from the table. if (streamOffset > 0) { try { TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart); } catch (InvalidOperationException ex) { if (isLenientParsing) { log.Error("Failed to parse /XRefStm at offset " + streamOffset, ex); } else { throw; } } } else { if (isLenientParsing) { log.Error("Skipped XRef stream due to a corrupt offset:" + streamOffset); } else { throw new PdfDocumentFormatException("Skipped XRef stream due to a corrupt offset:" + streamOffset); } } } table.Add(tablePart); if (streamPart != null) { table.Add(streamPart); } }