public long CheckXRefOffset(long startXRefOffset, bool isLenientParsing) { // repair mode isn't available in non-lenient mode if (!isLenientParsing) { return(startXRefOffset); } source.Seek(startXRefOffset); ReadHelper.SkipSpaces(source); if (source.Peek() == 'x' && ReadHelper.IsString(source, "xref")) { return(startXRefOffset); } if (startXRefOffset > 0) { if (CheckXRefStreamOffset(source, startXRefOffset, true, pool)) { return(startXRefOffset); } return(CalculateXRefFixedOffset(startXRefOffset)); } // can't find a valid offset return(-1); }
private void BruteForceSearchForTables(IInputBytes bytes) { if (bfSearchXRefTablesOffsets != null) { return; } // a pdf may contain more than one xref entry bfSearchXRefTablesOffsets = new List <long>(); var startOffset = bytes.CurrentOffset; bytes.Seek(MinimumSearchOffset); // search for xref tables while (bytes.MoveNext() && !bytes.IsAtEnd()) { if (ReadHelper.IsString(bytes, "xref")) { var newOffset = bytes.CurrentOffset; bytes.Seek(newOffset - 1); // ensure that we don't read "startxref" instead of "xref" if (ReadHelper.IsWhitespace(bytes.CurrentByte)) { bfSearchXRefTablesOffsets.Add(newOffset); } bytes.Seek(newOffset + 4); } } bytes.Seek(startOffset); }
private void BfSearchForXRefTables() { if (bfSearchXRefTablesOffsets == null) { // a pdf may contain more than one xref entry bfSearchXRefTablesOffsets = new List <long>(); long originOffset = source.GetPosition(); source.Seek(MinimumSearchOffset); // search for xref tables while (!source.IsEof()) { if (ReadHelper.IsString(source, "xref")) { long newOffset = source.GetPosition(); source.Seek(newOffset - 1); // ensure that we don't read "startxref" instead of "xref" if (ReadHelper.IsWhitespace(source)) { bfSearchXRefTablesOffsets.Add(newOffset); } source.Seek(newOffset + 4); } source.Read(); } source.Seek(originOffset); } }
private bool checkObjectKeys(IRandomAccessRead source, CosObjectKey objectKey, long offset) { // there can't be any object at the very beginning of a pdf if (offset < MINIMUM_SEARCH_OFFSET) { return(false); } long objectNr = objectKey.Number; long objectGen = objectKey.Generation; long originOffset = source.GetPosition(); string objectString = ObjectHelper.createObjectString(objectNr, objectGen); try { source.Seek(offset); if (ReadHelper.IsString(source, OtherEncodings.StringAsLatin1Bytes(objectString))) { // everything is ok, return origin object key source.Seek(originOffset); return(true); } } catch (InvalidOperationException exception) { // Swallow the exception, obviously there isn't any valid object number } finally { source.Seek(originOffset); } // no valid object number found return(false); }
private void BruteForceSearchForEndOfFileMarker(IInputBytes source) { if (lastEndOfFileMarker != null) { return; } long startOffset = source.CurrentOffset; source.Seek(MINIMUM_SEARCH_OFFSET); while (!source.IsAtEnd()) { // search for EOF marker if (ReadHelper.IsString(source, "%%EOF")) { long tempMarker = source.CurrentOffset; if (tempMarker >= source.Length) { lastEndOfFileMarker = tempMarker; break; } try { source.Seek(tempMarker + 5); // check if the following data is some valid pdf content // which most likely indicates that the pdf is linearized, // updated or just cut off somewhere in the middle ReadHelper.SkipSpaces(source); ObjectHelper.ReadObjectNumber(source); ObjectHelper.ReadGenerationNumber(source); } catch (Exception) { // save the EOF marker as the following data is most likely some garbage lastEndOfFileMarker = tempMarker; } } source.MoveNext(); } source.Seek(startOffset); // no EOF marker found if (lastEndOfFileMarker == null) { lastEndOfFileMarker = long.MaxValue; } }
private static bool CheckObjectKeys(IInputBytes bytes, IndirectReference objectKey, long offset) { // there can't be any object at the very beginning of a pdf if (offset < MinimumSearchOffset) { return(false); } var objectNr = objectKey.ObjectNumber; long objectGen = objectKey.Generation; var originOffset = bytes.CurrentOffset; var objectString = ObjectHelper.CreateObjectString(objectNr, objectGen); try { if (offset >= bytes.Length) { bytes.Seek(originOffset); return(false); } bytes.Seek(offset); if (ReadHelper.IsWhitespace(bytes.CurrentByte)) { bytes.MoveNext(); } if (ReadHelper.IsString(bytes, objectString)) { // everything is ok, return origin object key bytes.Seek(originOffset); return(true); } } catch (Exception) { // Swallow the exception, obviously there isn't any valid object number } finally { bytes.Seek(originOffset); } // no valid object number found return(false); }
private void bfSearchForLastEOFMarker(IRandomAccessRead source) { if (lastEOFMarker == null) { long originOffset = source.GetPosition(); source.Seek(MINIMUM_SEARCH_OFFSET); while (!source.IsEof()) { // search for EOF marker if (ReadHelper.IsString(source, "%%EOF")) { long tempMarker = source.GetPosition(); source.Seek(tempMarker + 5); try { // check if the following data is some valid pdf content // which most likely indicates that the pdf is linearized, // updated or just cut off somewhere in the middle ReadHelper.SkipSpaces(source); ObjectHelper.ReadObjectNumber(source); ObjectHelper.ReadGenerationNumber(source); } catch (InvalidOperationException exception) { // save the EOF marker as the following data is most likely some garbage lastEOFMarker = tempMarker; } } source.Read(); } source.Seek(originOffset); // no EOF marker found if (lastEOFMarker == null) { lastEOFMarker = long.MaxValue; } } }
private void BfSearchForXRefStreams(IInputBytes bytes) { if (bfSearchXRefStreamsOffsets != null) { return; } // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new List <long>(); var startOffset = bytes.CurrentOffset; bytes.Seek(MinimumSearchOffset); // search for XRef streams var objString = " obj"; while (bytes.MoveNext() && !bytes.IsAtEnd()) { if (!ReadHelper.IsString(bytes, "xref")) { continue; } // search backwards for the beginning of the stream long newOffset = -1; long xrefOffset = bytes.CurrentOffset; bool objFound = false; for (var i = 1; i < 40; i++) { if (objFound) { break; } long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) { bytes.Seek(currentOffset); for (int j = 0; j < 10; j++) { if (ReadHelper.IsString(bytes, objString)) { long tempOffset = currentOffset - 1; bytes.Seek(tempOffset); var generationNumber = bytes.Peek(); // is the next char a digit? if (generationNumber.HasValue && ReadHelper.IsDigit(generationNumber.Value)) { tempOffset--; bytes.Seek(tempOffset); // is the digit preceded by a space? if (ReadHelper.IsSpace(bytes.CurrentByte)) { int length = 0; bytes.Seek(--tempOffset); while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(bytes.CurrentByte)) { bytes.Seek(--tempOffset); length++; } if (length > 0) { bytes.MoveNext(); newOffset = bytes.CurrentOffset; } } } objFound = true; break; } currentOffset++; bytes.MoveNext(); } } } if (newOffset > -1) { bfSearchXRefStreamsOffsets.Add(newOffset); } bytes.Seek(xrefOffset + 5); } bytes.Seek(startOffset); }
private void BfSearchForXRefStreams() { if (bfSearchXRefStreamsOffsets == null) { // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new List <long>(); long originOffset = source.GetPosition(); source.Seek(MinimumSearchOffset); // search for XRef streams var objString = " obj"; while (!source.IsEof()) { if (ReadHelper.IsString(source, "xref")) { // search backwards for the beginning of the stream long newOffset = -1; long xrefOffset = source.GetPosition(); bool objFound = false; for (int i = 1; i < 40 && !objFound; i++) { long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) { source.Seek(currentOffset); for (int j = 0; j < 10; j++) { if (ReadHelper.IsString(source, objString)) { long tempOffset = currentOffset - 1; source.Seek(tempOffset); int genId = source.Peek(); // is the next char a digit? if (ReadHelper.IsDigit(genId)) { tempOffset--; source.Seek(tempOffset); if (ReadHelper.IsSpace(source)) { int length = 0; source.Seek(--tempOffset); while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(source)) { source.Seek(--tempOffset); length++; } if (length > 0) { source.Read(); newOffset = source.GetPosition(); } } } objFound = true; break; } else { currentOffset++; source.Read(); } } } } if (newOffset > -1) { bfSearchXRefStreamsOffsets.Add(newOffset); } source.Seek(xrefOffset + 5); } source.Read(); } source.Seek(originOffset); } }
private void bfSearchForObjects(IRandomAccessRead source) { bfSearchForLastEOFMarker(source); bfSearchCOSObjectKeyOffsets = new Dictionary <CosObjectKey, long>(); long originOffset = source.GetPosition(); long currentOffset = MINIMUM_SEARCH_OFFSET; long lastObjectId = long.MinValue; int lastGenID = int.MinValue; long lastObjOffset = long.MinValue; char[] objString = " obj".ToCharArray(); char[] endobjString = "endobj".ToCharArray(); bool endobjFound = false; do { source.Seek(currentOffset); if (ReadHelper.IsString(source, "obj")) { long tempOffset = currentOffset - 1; source.Seek(tempOffset); int genID = source.Peek(); // is the next char a digit? if (ReadHelper.IsDigit(genID)) { genID -= 48; tempOffset--; source.Seek(tempOffset); if (ReadHelper.IsSpace(source)) { while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsSpace(source)) { source.Seek(--tempOffset); } bool objectIDFound = false; while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsDigit(source)) { source.Seek(--tempOffset); objectIDFound = true; } if (objectIDFound) { source.Read(); long objectId = ObjectHelper.ReadObjectNumber(source); if (lastObjOffset > 0) { // add the former object ID only if there was a subsequent object ID bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset; } lastObjectId = objectId; lastGenID = genID; lastObjOffset = tempOffset + 1; currentOffset += objString.Length - 1; endobjFound = false; } } } } else if (ReadHelper.IsString(source, "endobj")) { endobjFound = true; currentOffset += endobjString.Length - 1; } currentOffset++; } while (currentOffset < lastEOFMarker && !source.IsEof()); if ((lastEOFMarker < long.MaxValue || endobjFound) && lastObjOffset > 0) { // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset; } // reestablish origin position source.Seek(originOffset); }