예제 #1
0
        private long GetLastEndOfFileMarker()
        {
            var originalOffset = bytes.CurrentOffset;

            const string searchTerm = "%%EOF";

            var minimumEndOffset = bytes.Length - searchTerm.Length;

            bytes.Seek(minimumEndOffset);

            while (bytes.CurrentOffset > 0)
            {
                if (ReadHelper.IsString(bytes, searchTerm))
                {
                    var position = bytes.CurrentOffset;

                    bytes.Seek(originalOffset);

                    return(position);
                }

                bytes.Seek(minimumEndOffset--);
            }

            bytes.Seek(originalOffset);
            return(long.MaxValue);
        }
예제 #2
0
        public static int ReadGenerationNumber(IInputBytes bytes)
        {
            int result = ReadHelper.ReadInt(bytes);

            if (result < 0 || result > GenerationNumberThreshold)
            {
                throw new FormatException("Generation Number '" + result + "' has more than 5 digits");
            }

            return(result);
        }
예제 #3
0
        public static long ReadObjectNumber(IInputBytes bytes)
        {
            long result = ReadHelper.ReadLong(bytes);

            if (result < 0 || result >= ObjectNumberThreshold)
            {
                throw new FormatException($"Object Number \'{result}\' has more than 10 digits or is negative");
            }

            return(result);
        }
예제 #4
0
        private static bool IsStartObjMarker(byte[] data)
        {
            if (!ReadHelper.IsWhitespace(data[0]))
            {
                return(false);
            }

            return((data[1] == 'o' || data[1] == 'O') &&
                   (data[2] == 'b' || data[2] == 'B') &&
                   (data[3] == 'j' || data[3] == 'J'));
        }
예제 #5
0
        public IReadOnlyDictionary <IndirectReference, long> GetObjectLocations()
        {
            if (objectLocations != null)
            {
                return(objectLocations);
            }

            var lastEndOfFile = GetLastEndOfFileMarker();

            var results = new Dictionary <IndirectReference, long>();

            var originPosition = bytes.CurrentOffset;

            long currentOffset    = MinimumSearchOffset;
            long lastObjectId     = long.MinValue;
            int  lastGenerationId = int.MinValue;
            long lastObjOffset    = long.MinValue;

            bool inObject    = false;
            bool endobjFound = false;

            do
            {
                if (inObject)
                {
                    if (bytes.CurrentByte == 'e')
                    {
                        var next = bytes.Peek();

                        if (next.HasValue && next == 'n')
                        {
                            if (ReadHelper.IsString(bytes, "endobj"))
                            {
                                inObject    = false;
                                endobjFound = true;

                                for (int i = 0; i < "endobj".Length; i++)
                                {
                                    bytes.MoveNext();
                                    currentOffset++;
                                }
                            }
                            else
                            {
                                bytes.MoveNext();
                                currentOffset++;
                            }
                        }
                        else
                        {
                            bytes.MoveNext();
                            currentOffset++;
                        }
                    }
                    else
                    {
                        bytes.MoveNext();
                        currentOffset++;
                    }

                    continue;
                }

                bytes.Seek(currentOffset);

                if (!ReadHelper.IsString(bytes, " obj"))
                {
                    currentOffset++;
                    continue;
                }

                // Current byte is ' '[obj]
                var offset = currentOffset - 1;

                bytes.Seek(offset);

                var generationBytes = new StringBuilder();
                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    generationBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                // We should now be at the space between object and generation number.
                if (!ReadHelper.IsSpace(bytes.CurrentByte))
                {
                    continue;
                }

                bytes.Seek(--offset);

                var objectNumberBytes = new StringBuilder();
                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    objectNumberBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                if (!ReadHelper.IsWhitespace(bytes.CurrentByte))
                {
                    continue;
                }

                var obj        = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture);
                var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture);

                results[new IndirectReference(obj, generation)] = bytes.CurrentOffset + 1;

                inObject    = true;
                endobjFound = false;

                currentOffset++;

                bytes.Seek(currentOffset);
            } while (currentOffset < lastEndOfFile && !bytes.IsAtEnd());

            if ((lastEndOfFile < long.MaxValue || endobjFound) && lastObjOffset > 0)
            {
                // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
                // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
                results[new IndirectReference(lastObjectId, lastGenerationId)] = lastObjOffset;
            }

            // reestablish origin position
            bytes.Seek(originPosition);

            objectLocations = results;

            return(objectLocations);
        }
예제 #6
0
        public static IReadOnlyDictionary <IndirectReference, long> GetObjectLocations(IInputBytes bytes)
        {
            if (bytes == null)
            {
                throw new ArgumentNullException(nameof(bytes));
            }

            var loopProtection = 0;

            var lastEndOfFile = GetLastEndOfFileMarker(bytes);

            var results = new Dictionary <IndirectReference, long>();

            var generationBytes   = new StringBuilder();
            var objectNumberBytes = new StringBuilder();

            var originPosition = bytes.CurrentOffset;

            var currentOffset = (long)MinimumSearchOffset;

            var currentlyInObject = false;

            var objBuffer = new byte[4];

            do
            {
                if (loopProtection > 10_000_000)
                {
                    throw new PdfDocumentFormatException("Failed to brute-force search the file due to an infinite loop.");
                }

                loopProtection++;

                if (currentlyInObject)
                {
                    if (bytes.CurrentByte == 'e')
                    {
                        var next = bytes.Peek();

                        if (next.HasValue && next == 'n')
                        {
                            if (ReadHelper.IsString(bytes, "endobj"))
                            {
                                currentlyInObject = false;
                                loopProtection    = 0;

                                for (var i = 0; i < "endobj".Length; i++)
                                {
                                    bytes.MoveNext();
                                    currentOffset++;
                                }
                            }
                            else
                            {
                                bytes.MoveNext();
                                currentOffset++;
                            }
                        }
                        else
                        {
                            bytes.MoveNext();
                            currentOffset++;
                        }
                    }
                    else
                    {
                        bytes.MoveNext();
                        currentOffset++;
                        loopProtection = 0;
                    }

                    continue;
                }

                bytes.Seek(currentOffset);

                bytes.Read(objBuffer);

                if (!IsStartObjMarker(objBuffer))
                {
                    currentOffset++;
                    continue;
                }

                // Current byte is ' '[obj]
                var offset = currentOffset + 1;

                bytes.Seek(offset);

                while (ReadHelper.IsWhitespace(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    bytes.Seek(--offset);
                }

                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    generationBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                // We should now be at the space between object and generation number.
                if (!ReadHelper.IsWhitespace(bytes.CurrentByte))
                {
                    currentOffset++;
                    continue;
                }

                while (ReadHelper.IsWhitespace(bytes.CurrentByte))
                {
                    bytes.Seek(--offset);
                }

                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    objectNumberBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                if (objectNumberBytes.Length == 0 || generationBytes.Length == 0)
                {
                    generationBytes.Clear();
                    objectNumberBytes.Clear();
                    currentOffset++;
                    continue;
                }

                var obj        = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture);
                var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture);

                results[new IndirectReference(obj, generation)] = bytes.CurrentOffset;

                generationBytes.Clear();
                objectNumberBytes.Clear();

                currentlyInObject = true;

                currentOffset++;

                bytes.Seek(currentOffset);
                loopProtection = 0;
            } while (currentOffset < lastEndOfFile && !bytes.IsAtEnd());

            // reestablish origin position
            bytes.Seek(originPosition);

            return(results);
        }