Example #1
0
        private long GetLastEndOfFileMarker()
        {
            var originalOffset = reader.GetPosition();

            var searchTerm = OtherEncodings.StringAsLatin1Bytes("%%EOF");

            var minimumEndOffset = reader.Length() - searchTerm.Length;

            reader.Seek(minimumEndOffset);

            while (reader.GetPosition() > 0)
            {
                if (ReadHelper.IsString(reader, searchTerm))
                {
                    var position = reader.GetPosition();
                    reader.Seek(originalOffset);
                    return(position);
                }

                reader.Seek(minimumEndOffset--);
            }

            reader.Seek(originalOffset);
            return(long.MaxValue);
        }
Example #2
0
        private bool validateStreamLength(IRandomAccessRead source, long streamLength, long fileLength)
        {
            bool streamLengthIsValid = true;
            long originOffset        = source.GetPosition();
            long expectedEndOfStream = originOffset + streamLength;

            if (expectedEndOfStream > fileLength)
            {
                streamLengthIsValid = false;
                //LOG.warn("The end of the stream is out of range, using workaround to read the stream, "
                //        + "stream start position: " + originOffset + ", length: " + streamLength
                //        + ", expected end position: " + expectedEndOfStream);
            }
            else
            {
                source.Seek(expectedEndOfStream);
                ReadHelper.SkipSpaces(source);
                if (!ReadHelper.IsString(source, "endstream"))
                {
                    streamLengthIsValid = false;
                    //LOG.warn("The end of the stream doesn't point to the correct offset, using workaround to read the stream, "
                    //        + "stream start position: " + originOffset + ", length: " + streamLength
                    //        + ", expected end position: " + expectedEndOfStream);
                }
                source.Seek(originOffset);
            }
            return(streamLengthIsValid);
        }
Example #3
0
        public IReadOnlyDictionary <CosObjectKey, long> GetObjectLocations()
        {
            if (objectLocations != null)
            {
                return(objectLocations);
            }

            var lastEndOfFile = GetLastEndOfFileMarker();

            var results = new Dictionary <CosObjectKey, long>();

            var originPosition = reader.GetPosition();

            long currentOffset    = MinimumSearchOffset;
            long lastObjectId     = long.MinValue;
            int  lastGenerationId = int.MinValue;
            long lastObjOffset    = long.MinValue;

            byte[] objString    = OtherEncodings.StringAsLatin1Bytes(" obj");
            byte[] endobjString = OtherEncodings.StringAsLatin1Bytes("endobj");

            bool endobjFound = false;

            do
            {
                reader.Seek(currentOffset);
                if (ReadHelper.IsString(reader, objString))
                {
                    long tempOffset = currentOffset - 1;
                    reader.Seek(tempOffset);
                    int generationId = reader.Peek();

                    // is the next char a digit?
                    if (ReadHelper.IsDigit(generationId))
                    {
                        generationId -= 48;
                        tempOffset--;
                        reader.Seek(tempOffset);
                        if (ReadHelper.IsSpace(reader))
                        {
                            while (tempOffset > MinimumSearchOffset && ReadHelper.IsSpace(reader))
                            {
                                reader.Seek(--tempOffset);
                            }

                            bool objectIdFound = false;
                            while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(reader))
                            {
                                reader.Seek(--tempOffset);
                                objectIdFound = true;
                            }

                            if (objectIdFound)
                            {
                                reader.Read();
                                long objectId = ObjectHelper.ReadObjectNumber(reader);
                                if (lastObjOffset > 0)
                                {
                                    // add the former object ID only if there was a subsequent object ID
                                    results[new CosObjectKey(lastObjectId, lastGenerationId)] = lastObjOffset;
                                }
                                lastObjectId     = objectId;
                                lastGenerationId = generationId;
                                lastObjOffset    = tempOffset + 1;
                                currentOffset   += objString.Length - 1;
                                endobjFound      = false;
                            }
                        }
                    }
                }
                else if (ReadHelper.IsString(reader, "endobj"))
                {
                    endobjFound    = true;
                    currentOffset += endobjString.Length - 1;
                }
                currentOffset++;
            } while (currentOffset < lastEndOfFile && !reader.IsEof());
            if ((lastEndOfFile < long.MaxValue || endobjFound) && lastObjOffset > 0)
            {
                // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
                // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
                results[new CosObjectKey(lastObjectId, lastGenerationId)] = lastObjOffset;
            }

            // reestablish origin position
            reader.Seek(originPosition);

            objectLocations = results;

            return(objectLocations);
        }