private long GetLastEndOfFileMarker() { var originalOffset = bytes.CurrentOffset; const string searchTerm = "%%EOF"; var minimumEndOffset = bytes.Length - searchTerm.Length; bytes.Seek(minimumEndOffset); while (bytes.CurrentOffset > 0) { if (ReadHelper.IsString(bytes, searchTerm)) { var position = bytes.CurrentOffset; bytes.Seek(originalOffset); return(position); } bytes.Seek(minimumEndOffset--); } bytes.Seek(originalOffset); return(long.MaxValue); }
public static int ReadGenerationNumber(IInputBytes bytes) { int result = ReadHelper.ReadInt(bytes); if (result < 0 || result > GenerationNumberThreshold) { throw new FormatException("Generation Number '" + result + "' has more than 5 digits"); } return(result); }
public static long ReadObjectNumber(IInputBytes bytes) { long result = ReadHelper.ReadLong(bytes); if (result < 0 || result >= ObjectNumberThreshold) { throw new FormatException($"Object Number \'{result}\' has more than 10 digits or is negative"); } return(result); }
private static bool IsStartObjMarker(byte[] data) { if (!ReadHelper.IsWhitespace(data[0])) { return(false); } return((data[1] == 'o' || data[1] == 'O') && (data[2] == 'b' || data[2] == 'B') && (data[3] == 'j' || data[3] == 'J')); }
public IReadOnlyDictionary <IndirectReference, long> GetObjectLocations() { if (objectLocations != null) { return(objectLocations); } var lastEndOfFile = GetLastEndOfFileMarker(); var results = new Dictionary <IndirectReference, long>(); var originPosition = bytes.CurrentOffset; long currentOffset = MinimumSearchOffset; long lastObjectId = long.MinValue; int lastGenerationId = int.MinValue; long lastObjOffset = long.MinValue; bool inObject = false; bool endobjFound = false; do { if (inObject) { if (bytes.CurrentByte == 'e') { var next = bytes.Peek(); if (next.HasValue && next == 'n') { if (ReadHelper.IsString(bytes, "endobj")) { inObject = false; endobjFound = true; for (int i = 0; i < "endobj".Length; i++) { bytes.MoveNext(); currentOffset++; } } else { bytes.MoveNext(); currentOffset++; } } else { bytes.MoveNext(); currentOffset++; } } else { bytes.MoveNext(); currentOffset++; } continue; } bytes.Seek(currentOffset); if (!ReadHelper.IsString(bytes, " obj")) { currentOffset++; continue; } // Current byte is ' '[obj] var offset = currentOffset - 1; bytes.Seek(offset); var generationBytes = new StringBuilder(); while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) { generationBytes.Insert(0, (char)bytes.CurrentByte); offset--; bytes.Seek(offset); } // We should now be at the space between object and generation number. if (!ReadHelper.IsSpace(bytes.CurrentByte)) { continue; } bytes.Seek(--offset); var objectNumberBytes = new StringBuilder(); while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) { objectNumberBytes.Insert(0, (char)bytes.CurrentByte); offset--; bytes.Seek(offset); } if (!ReadHelper.IsWhitespace(bytes.CurrentByte)) { continue; } var obj = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture); var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture); results[new IndirectReference(obj, generation)] = bytes.CurrentOffset + 1; inObject = true; endobjFound = false; currentOffset++; bytes.Seek(currentOffset); } while (currentOffset < lastEndOfFile && !bytes.IsAtEnd()); if ((lastEndOfFile < long.MaxValue || endobjFound) && lastObjOffset > 0) { // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id results[new IndirectReference(lastObjectId, lastGenerationId)] = lastObjOffset; } // reestablish origin position bytes.Seek(originPosition); objectLocations = results; return(objectLocations); }
public static IReadOnlyDictionary <IndirectReference, long> GetObjectLocations(IInputBytes bytes) { if (bytes == null) { throw new ArgumentNullException(nameof(bytes)); } var loopProtection = 0; var lastEndOfFile = GetLastEndOfFileMarker(bytes); var results = new Dictionary <IndirectReference, long>(); var generationBytes = new StringBuilder(); var objectNumberBytes = new StringBuilder(); var originPosition = bytes.CurrentOffset; var currentOffset = (long)MinimumSearchOffset; var currentlyInObject = false; var objBuffer = new byte[4]; do { if (loopProtection > 10_000_000) { throw new PdfDocumentFormatException("Failed to brute-force search the file due to an infinite loop."); } loopProtection++; if (currentlyInObject) { if (bytes.CurrentByte == 'e') { var next = bytes.Peek(); if (next.HasValue && next == 'n') { if (ReadHelper.IsString(bytes, "endobj")) { currentlyInObject = false; loopProtection = 0; for (var i = 0; i < "endobj".Length; i++) { bytes.MoveNext(); currentOffset++; } } else { bytes.MoveNext(); currentOffset++; } } else { bytes.MoveNext(); currentOffset++; } } else { bytes.MoveNext(); currentOffset++; loopProtection = 0; } continue; } bytes.Seek(currentOffset); bytes.Read(objBuffer); if (!IsStartObjMarker(objBuffer)) { currentOffset++; continue; } // Current byte is ' '[obj] var offset = currentOffset + 1; bytes.Seek(offset); while (ReadHelper.IsWhitespace(bytes.CurrentByte) && offset >= MinimumSearchOffset) { bytes.Seek(--offset); } while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) { generationBytes.Insert(0, (char)bytes.CurrentByte); offset--; bytes.Seek(offset); } // We should now be at the space between object and generation number. if (!ReadHelper.IsWhitespace(bytes.CurrentByte)) { currentOffset++; continue; } while (ReadHelper.IsWhitespace(bytes.CurrentByte)) { bytes.Seek(--offset); } while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) { objectNumberBytes.Insert(0, (char)bytes.CurrentByte); offset--; bytes.Seek(offset); } if (objectNumberBytes.Length == 0 || generationBytes.Length == 0) { generationBytes.Clear(); objectNumberBytes.Clear(); currentOffset++; continue; } var obj = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture); var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture); results[new IndirectReference(obj, generation)] = bytes.CurrentOffset; generationBytes.Clear(); objectNumberBytes.Clear(); currentlyInObject = true; currentOffset++; bytes.Seek(currentOffset); loopProtection = 0; } while (currentOffset < lastEndOfFile && !bytes.IsAtEnd()); // reestablish origin position bytes.Seek(originPosition); return(results); }