private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool) { var numOffset = reader.GetPosition(); var value = baseParser.Parse(reader, pool); ReadHelper.SkipSpaces(reader); // proceed if the given object is a number and the following is a number as well if (!(value is ICosNumber) || !ReadHelper.IsDigit(reader)) { return(value); } // read the remaining information of the object number var genOffset = reader.GetPosition(); var generationNumber = baseParser.Parse(reader, pool); ReadHelper.SkipSpaces(reader); ReadHelper.ReadExpectedChar(reader, 'R'); if (!(value is CosInt)) { throw new InvalidOperationException("expected number, actual=" + value + " at offset " + numOffset); } if (!(generationNumber is CosInt)) { throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset); } var key = new CosObjectKey(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt()); // dereference the object return(pool.Get(key)); }
private bool checkObjectKeys(IRandomAccessRead source, CosObjectKey objectKey, long offset) { // there can't be any object at the very beginning of a pdf if (offset < MINIMUM_SEARCH_OFFSET) { return(false); } long objectNr = objectKey.Number; long objectGen = objectKey.Generation; long originOffset = source.GetPosition(); string objectString = ObjectHelper.createObjectString(objectNr, objectGen); try { source.Seek(offset); if (ReadHelper.IsString(source, OtherEncodings.StringAsLatin1Bytes(objectString))) { // everything is ok, return origin object key source.Seek(originOffset); return(true); } } catch (InvalidOperationException exception) { // Swallow the exception, obviously there isn't any valid object number } finally { source.Seek(originOffset); } // no valid object number found return(false); }
private void bfSearchForLastEOFMarker(IRandomAccessRead source) { if (lastEOFMarker == null) { long originOffset = source.GetPosition(); source.Seek(MINIMUM_SEARCH_OFFSET); while (!source.IsEof()) { // search for EOF marker if (ReadHelper.IsString(source, "%%EOF")) { long tempMarker = source.GetPosition(); source.Seek(tempMarker + 5); try { // check if the following data is some valid pdf content // which most likely indicates that the pdf is linearized, // updated or just cut off somewhere in the middle ReadHelper.SkipSpaces(source); ObjectHelper.ReadObjectNumber(source); ObjectHelper.ReadGenerationNumber(source); } catch (InvalidOperationException exception) { // save the EOF marker as the following data is most likely some garbage lastEOFMarker = tempMarker; } } source.Read(); } source.Seek(originOffset); // no EOF marker found if (lastEOFMarker == null) { lastEOFMarker = long.MaxValue; } } }
public static bool IsString(IRandomAccessRead reader, IEnumerable <byte> str) { bool bytesMatching = true; long originOffset = reader.GetPosition(); foreach (var c in str) { if (reader.Read() != c) { bytesMatching = false; break; } } reader.Seek(originOffset); return(bytesMatching); }
public static long ReadLong(IRandomAccessRead reader) { SkipSpaces(reader); long retval; StringBuilder longBuffer = ReadStringNumber(reader); try { retval = long.Parse(longBuffer.ToString()); } catch (FormatException e) { var bytesToReverse = OtherEncodings.StringAsLatin1Bytes(longBuffer.ToString()); reader.Unread(bytesToReverse); throw new InvalidOperationException($"Error: Expected a long type at offset {reader.GetPosition()}, instead got \'{longBuffer}\'", e); } return(retval); }
private void BfSearchForXRefStreams() { if (bfSearchXRefStreamsOffsets == null) { // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new List <long>(); long originOffset = source.GetPosition(); source.Seek(MinimumSearchOffset); // search for XRef streams var objString = " obj"; while (!source.IsEof()) { if (ReadHelper.IsString(source, "xref")) { // search backwards for the beginning of the stream long newOffset = -1; long xrefOffset = source.GetPosition(); bool objFound = false; for (int i = 1; i < 40 && !objFound; i++) { long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) { source.Seek(currentOffset); for (int j = 0; j < 10; j++) { if (ReadHelper.IsString(source, objString)) { long tempOffset = currentOffset - 1; source.Seek(tempOffset); int genId = source.Peek(); // is the next char a digit? if (ReadHelper.IsDigit(genId)) { tempOffset--; source.Seek(tempOffset); if (ReadHelper.IsSpace(source)) { int length = 0; source.Seek(--tempOffset); while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(source)) { source.Seek(--tempOffset); length++; } if (length > 0) { source.Read(); newOffset = source.GetPosition(); } } } objFound = true; break; } else { currentOffset++; source.Read(); } } } } if (newOffset > -1) { bfSearchXRefStreamsOffsets.Add(newOffset); } source.Seek(xrefOffset + 5); } source.Read(); } source.Seek(originOffset); } }
private void bfSearchForObjects(IRandomAccessRead source) { bfSearchForLastEOFMarker(source); bfSearchCOSObjectKeyOffsets = new Dictionary <CosObjectKey, long>(); long originOffset = source.GetPosition(); long currentOffset = MINIMUM_SEARCH_OFFSET; long lastObjectId = long.MinValue; int lastGenID = int.MinValue; long lastObjOffset = long.MinValue; char[] objString = " obj".ToCharArray(); char[] endobjString = "endobj".ToCharArray(); bool endobjFound = false; do { source.Seek(currentOffset); if (ReadHelper.IsString(source, "obj")) { long tempOffset = currentOffset - 1; source.Seek(tempOffset); int genID = source.Peek(); // is the next char a digit? if (ReadHelper.IsDigit(genID)) { genID -= 48; tempOffset--; source.Seek(tempOffset); if (ReadHelper.IsSpace(source)) { while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsSpace(source)) { source.Seek(--tempOffset); } bool objectIDFound = false; while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsDigit(source)) { source.Seek(--tempOffset); objectIDFound = true; } if (objectIDFound) { source.Read(); long objectId = ObjectHelper.ReadObjectNumber(source); if (lastObjOffset > 0) { // add the former object ID only if there was a subsequent object ID bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset; } lastObjectId = objectId; lastGenID = genID; lastObjOffset = tempOffset + 1; currentOffset += objString.Length - 1; endobjFound = false; } } } } else if (ReadHelper.IsString(source, "endobj")) { endobjFound = true; currentOffset += endobjString.Length - 1; } currentOffset++; } while (currentOffset < lastEOFMarker && !source.IsEof()); if ((lastEOFMarker < long.MaxValue || endobjFound) && lastObjOffset > 0) { // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset; } // reestablish origin position source.Seek(originOffset); }
public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool) { CosBase retval = null; ReadHelper.SkipSpaces(reader); int nextByte = reader.Peek(); if (nextByte == -1) { return(null); } char c = (char)nextByte; switch (c) { case '<': { // pull off first left bracket int leftBracket = reader.Read(); // check for second left bracket c = (char)reader.Peek(); reader.Unread(leftBracket); if (c == '<') { retval = dictionaryParser.Parse(reader, this, pool); ReadHelper.SkipSpaces(reader); } else { retval = stringParser.Parse(reader); } break; } case '[': { // array retval = arrayParser.Parse(reader, this, pool); break; } case '(': retval = stringParser.Parse(reader); break; case '/': // name retval = nameParser.Parse(reader); break; case 'n': { // null ReadHelper.ReadExpectedString(reader, "null"); retval = CosNull.Null; break; } case 't': { string truestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(4)); if (truestring.Equals("true")) { retval = PdfBoolean.True; } else { throw new IOException("expected true actual='" + truestring + "' " + reader + "' at offset " + reader.GetPosition()); } break; } case 'f': { string falsestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(5)); if (falsestring.Equals("false")) { retval = PdfBoolean.False; } else { throw new IOException("expected false actual='" + falsestring + "' " + reader + "' at offset " + reader.GetPosition()); } break; } case 'R': reader.Read(); retval = new CosObject(null); break; default: if (char.IsDigit(c) || c == '-' || c == '+' || c == '.') { StringBuilder buf = new StringBuilder(); int ic = reader.Read(); c = (char)ic; while (char.IsDigit(c) || c == '-' || c == '+' || c == '.' || c == 'E' || c == 'e') { buf.Append(c); ic = reader.Read(); c = (char)ic; } if (ic != -1) { reader.Unread(ic); } retval = CosNumberFactory.get(buf.ToString()) as CosBase; } else { //This is not suppose to happen, but we will allow for it //so we are more compatible with POS writers that don't //follow the spec string badstring = ReadHelper.ReadString(reader); if (badstring == string.Empty) { int peek = reader.Peek(); // we can end up in an infinite loop otherwise throw new IOException("Unknown dir object c='" + c + "' cInt=" + (int)c + " peek='" + (char)peek + "' peekInt=" + peek + " at offset " + reader.GetPosition()); } // if it's an endstream/endobj, we want to put it back so the caller will see it if (string.Equals("endobj", badstring) || string.Equals("endstream", badstring)) { reader.Unread(OtherEncodings.StringAsLatin1Bytes(badstring)); } } break; } return(retval); }