Exemplo n.º 1
0
        public IReadOnlyDictionary <IndirectReference, long> GetObjectLocations()
        {
            if (objectLocations != null)
            {
                return(objectLocations);
            }

            var lastEndOfFile = GetLastEndOfFileMarker();

            var results = new Dictionary <IndirectReference, long>();

            var originPosition = bytes.CurrentOffset;

            long currentOffset    = MinimumSearchOffset;
            long lastObjectId     = long.MinValue;
            int  lastGenerationId = int.MinValue;
            long lastObjOffset    = long.MinValue;

            bool inObject    = false;
            bool endobjFound = false;

            do
            {
                if (inObject)
                {
                    if (bytes.CurrentByte == 'e')
                    {
                        var next = bytes.Peek();

                        if (next.HasValue && next == 'n')
                        {
                            if (ReadHelper.IsString(bytes, "endobj"))
                            {
                                inObject    = false;
                                endobjFound = true;

                                for (int i = 0; i < "endobj".Length; i++)
                                {
                                    bytes.MoveNext();
                                    currentOffset++;
                                }
                            }
                            else
                            {
                                bytes.MoveNext();
                                currentOffset++;
                            }
                        }
                        else
                        {
                            bytes.MoveNext();
                            currentOffset++;
                        }
                    }
                    else
                    {
                        bytes.MoveNext();
                        currentOffset++;
                    }

                    continue;
                }

                bytes.Seek(currentOffset);

                if (!ReadHelper.IsString(bytes, " obj"))
                {
                    currentOffset++;
                    continue;
                }

                // Current byte is ' '[obj]
                var offset = currentOffset - 1;

                bytes.Seek(offset);

                var generationBytes = new StringBuilder();
                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    generationBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                // We should now be at the space between object and generation number.
                if (!ReadHelper.IsSpace(bytes.CurrentByte))
                {
                    continue;
                }

                bytes.Seek(--offset);

                var objectNumberBytes = new StringBuilder();
                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    objectNumberBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                if (!ReadHelper.IsWhitespace(bytes.CurrentByte))
                {
                    continue;
                }

                var obj        = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture);
                var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture);

                results[new IndirectReference(obj, generation)] = bytes.CurrentOffset + 1;

                inObject    = true;
                endobjFound = false;

                currentOffset++;

                bytes.Seek(currentOffset);
            } while (currentOffset < lastEndOfFile && !bytes.IsAtEnd());

            if ((lastEndOfFile < long.MaxValue || endobjFound) && lastObjOffset > 0)
            {
                // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
                // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
                results[new IndirectReference(lastObjectId, lastGenerationId)] = lastObjOffset;
            }

            // reestablish origin position
            bytes.Seek(originPosition);

            objectLocations = results;

            return(objectLocations);
        }
Exemplo n.º 2
0
        public static IReadOnlyDictionary <IndirectReference, long> GetObjectLocations(IInputBytes bytes)
        {
            if (bytes == null)
            {
                throw new ArgumentNullException(nameof(bytes));
            }

            var loopProtection = 0;

            var lastEndOfFile = GetLastEndOfFileMarker(bytes);

            var results = new Dictionary <IndirectReference, long>();

            var generationBytes   = new StringBuilder();
            var objectNumberBytes = new StringBuilder();

            var originPosition = bytes.CurrentOffset;

            var currentOffset = (long)MinimumSearchOffset;

            var currentlyInObject = false;

            do
            {
                if (loopProtection > 1_000_000)
                {
                    throw new PdfDocumentFormatException("Failed to brute-force search the file due to an infinite loop.");
                }

                loopProtection++;

                if (currentlyInObject)
                {
                    if (bytes.CurrentByte == 'e')
                    {
                        var next = bytes.Peek();

                        if (next.HasValue && next == 'n')
                        {
                            if (ReadHelper.IsString(bytes, "endobj"))
                            {
                                currentlyInObject = false;
                                loopProtection    = 0;

                                for (var i = 0; i < "endobj".Length; i++)
                                {
                                    bytes.MoveNext();
                                    currentOffset++;
                                }
                            }
                            else
                            {
                                bytes.MoveNext();
                                currentOffset++;
                            }
                        }
                        else
                        {
                            bytes.MoveNext();
                            currentOffset++;
                        }
                    }
                    else
                    {
                        bytes.MoveNext();
                        currentOffset++;
                        loopProtection = 0;
                    }

                    continue;
                }

                bytes.Seek(currentOffset);

                if (!ReadHelper.IsString(bytes, " obj"))
                {
                    currentOffset++;
                    continue;
                }

                // Current byte is ' '[obj]
                var offset = currentOffset - 1;

                bytes.Seek(offset);

                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    generationBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                // We should now be at the space between object and generation number.
                if (!ReadHelper.IsSpace(bytes.CurrentByte))
                {
                    currentOffset++;
                    continue;
                }

                bytes.Seek(--offset);

                while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
                {
                    objectNumberBytes.Insert(0, (char)bytes.CurrentByte);
                    offset--;
                    bytes.Seek(offset);
                }

                if (objectNumberBytes.Length == 0 || generationBytes.Length == 0)
                {
                    generationBytes.Clear();
                    objectNumberBytes.Clear();
                    currentOffset++;
                    continue;
                }

                var obj        = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture);
                var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture);

                results[new IndirectReference(obj, generation)] = bytes.CurrentOffset;

                generationBytes.Clear();
                objectNumberBytes.Clear();

                currentlyInObject = true;

                currentOffset++;

                bytes.Seek(currentOffset);
                loopProtection = 0;
            } while (currentOffset < lastEndOfFile && !bytes.IsAtEnd());

            // reestablish origin position
            bytes.Seek(originPosition);

            return(results);
        }