private void BfSearchForXRefStreams() { if (bfSearchXRefStreamsOffsets == null) { // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new List <long>(); long originOffset = source.GetPosition(); source.Seek(MinimumSearchOffset); // search for XRef streams var objString = " obj"; while (!source.IsEof()) { if (ReadHelper.IsString(source, "xref")) { // search backwards for the beginning of the stream long newOffset = -1; long xrefOffset = source.GetPosition(); bool objFound = false; for (int i = 1; i < 40 && !objFound; i++) { long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) { source.Seek(currentOffset); for (int j = 0; j < 10; j++) { if (ReadHelper.IsString(source, objString)) { long tempOffset = currentOffset - 1; source.Seek(tempOffset); int genId = source.Peek(); // is the next char a digit? if (ReadHelper.IsDigit(genId)) { tempOffset--; source.Seek(tempOffset); if (ReadHelper.IsSpace(source)) { int length = 0; source.Seek(--tempOffset); while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(source)) { source.Seek(--tempOffset); length++; } if (length > 0) { source.Read(); newOffset = source.GetPosition(); } } } objFound = true; break; } else { currentOffset++; source.Read(); } } } } if (newOffset > -1) { bfSearchXRefStreamsOffsets.Add(newOffset); } source.Seek(xrefOffset + 5); } source.Read(); } source.Seek(originOffset); } }
private void BfSearchForXRefStreams(IInputBytes bytes) { if (bfSearchXRefStreamsOffsets != null) { return; } // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new List <long>(); var startOffset = bytes.CurrentOffset; bytes.Seek(MinimumSearchOffset); // search for XRef streams var objString = " obj"; while (bytes.MoveNext() && !bytes.IsAtEnd()) { if (!ReadHelper.IsString(bytes, "xref")) { continue; } // search backwards for the beginning of the stream long newOffset = -1; long xrefOffset = bytes.CurrentOffset; bool objFound = false; for (var i = 1; i < 40; i++) { if (objFound) { break; } long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) { bytes.Seek(currentOffset); for (int j = 0; j < 10; j++) { if (ReadHelper.IsString(bytes, objString)) { long tempOffset = currentOffset - 1; bytes.Seek(tempOffset); var generationNumber = bytes.Peek(); // is the next char a digit? if (generationNumber.HasValue && ReadHelper.IsDigit(generationNumber.Value)) { tempOffset--; bytes.Seek(tempOffset); // is the digit preceded by a space? if (ReadHelper.IsSpace(bytes.CurrentByte)) { int length = 0; bytes.Seek(--tempOffset); while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(bytes.CurrentByte)) { bytes.Seek(--tempOffset); length++; } if (length > 0) { bytes.MoveNext(); newOffset = bytes.CurrentOffset; } } } objFound = true; break; } currentOffset++; bytes.MoveNext(); } } } if (newOffset > -1) { bfSearchXRefStreamsOffsets.Add(newOffset); } bytes.Seek(xrefOffset + 5); } bytes.Seek(startOffset); }
private void bfSearchForObjects(IRandomAccessRead source) { bfSearchForLastEOFMarker(source); bfSearchCOSObjectKeyOffsets = new Dictionary <CosObjectKey, long>(); long originOffset = source.GetPosition(); long currentOffset = MINIMUM_SEARCH_OFFSET; long lastObjectId = long.MinValue; int lastGenID = int.MinValue; long lastObjOffset = long.MinValue; char[] objString = " obj".ToCharArray(); char[] endobjString = "endobj".ToCharArray(); bool endobjFound = false; do { source.Seek(currentOffset); if (ReadHelper.IsString(source, "obj")) { long tempOffset = currentOffset - 1; source.Seek(tempOffset); int genID = source.Peek(); // is the next char a digit? if (ReadHelper.IsDigit(genID)) { genID -= 48; tempOffset--; source.Seek(tempOffset); if (ReadHelper.IsSpace(source)) { while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsSpace(source)) { source.Seek(--tempOffset); } bool objectIDFound = false; while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsDigit(source)) { source.Seek(--tempOffset); objectIDFound = true; } if (objectIDFound) { source.Read(); long objectId = ObjectHelper.ReadObjectNumber(source); if (lastObjOffset > 0) { // add the former object ID only if there was a subsequent object ID bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset; } lastObjectId = objectId; lastGenID = genID; lastObjOffset = tempOffset + 1; currentOffset += objString.Length - 1; endobjFound = false; } } } } else if (ReadHelper.IsString(source, "endobj")) { endobjFound = true; currentOffset += endobjString.Length - 1; } currentOffset++; } while (currentOffset < lastEOFMarker && !source.IsEof()); if ((lastEOFMarker < long.MaxValue || endobjFound) && lastObjOffset > 0) { // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset; } // reestablish origin position source.Seek(originOffset); }