Ejemplo n.º 1
0
        public void ReaderEscapesUnexpectedObject()
        {
            const string s = @"%PDF-1.7
abcd

1 0 obj
<< /Type /Any >>

endobj

%AZ 0 obj
11 0 obj
769
endobj

%%EOF";

            var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));

            var locations = BruteForceSearcher.GetObjectLocations(bytes);

            Assert.Equal(2, locations.Count);

            var expectedLocations = new long[]
            {
                s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
                s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase)
            };

            Assert.Equal(expectedLocations, locations.Values);
        }
Ejemplo n.º 2
0
        public void ReaderEscapesUnexpectedGenerationNumber()
        {
            const string s = @"%PDF-2.0
abcdefghijklmnop

1 0 obj
256
endobj

16-0 obj

5 0 obj
<< /IsEmpty false >>
endobj";

            var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));

            var locations = BruteForceSearcher.GetObjectLocations(bytes);

            Assert.Equal(2, locations.Count);

            var expectedLocations = new long[]
            {
                s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
                s.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
            };

            Assert.Equal(expectedLocations, locations.Values);
        }
Ejemplo n.º 3
0
        public void BruteForceSearcherFileOffsetsCorrect()
        {
            using (var fs = File.OpenRead(IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf")))
            {
                var bytes    = new StreamInputBytes(fs);
                var searcher = new BruteForceSearcher(bytes);

                var locations = searcher.GetObjectLocations();

                Assert.Equal(13, locations.Count);

                Assert.Equal(6183, locations[new IndirectReference(1, 0)]);
                Assert.Equal(244, locations[new IndirectReference(2, 0)]);
                Assert.Equal(15, locations[new IndirectReference(3, 0)]);
                Assert.Equal(222, locations[new IndirectReference(4, 0)]);
                Assert.Equal(5766, locations[new IndirectReference(5, 0)]);
                Assert.Equal(353, locations[new IndirectReference(6, 0)]);
                Assert.Equal(581, locations[new IndirectReference(7, 0)]);
                Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
                Assert.Equal(5091, locations[new IndirectReference(9, 0)]);

                var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
                Assert.StartsWith("3 0 obj", s);
            }
        }
Ejemplo n.º 4
0
        public void BruteForceSearcherFileOffsetsCorrectOpenOffice()
        {
            var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf")));

            var locations = BruteForceSearcher.GetObjectLocations(bytes);

            Assert.Equal(13, locations.Count);

            Assert.Equal(17, locations[new IndirectReference(1, 0)]);
            Assert.Equal(249, locations[new IndirectReference(2, 0)]);
            Assert.Equal(14291, locations[new IndirectReference(3, 0)]);
            Assert.Equal(275, locations[new IndirectReference(4, 0)]);
            Assert.Equal(382, locations[new IndirectReference(5, 0)]);
            Assert.Equal(13283, locations[new IndirectReference(6, 0)]);
            Assert.Equal(13309, locations[new IndirectReference(7, 0)]);
            Assert.Equal(13556, locations[new IndirectReference(8, 0)]);
            Assert.Equal(13926, locations[new IndirectReference(9, 0)]);
            Assert.Equal(14183, locations[new IndirectReference(10, 0)]);
            Assert.Equal(14224, locations[new IndirectReference(11, 0)]);
            Assert.Equal(14428, locations[new IndirectReference(12, 0)]);
            Assert.Equal(14488, locations[new IndirectReference(13, 0)]);

            var s = GetStringAt(bytes, locations[new IndirectReference(12, 0)]);

            Assert.StartsWith("12 0 obj", s);
        }
Ejemplo n.º 5
0
        public bool TryGetOffset(IndirectReference reference, out long offset)
        {
            if (!loadedFromTable)
            {
                var table = crossReferenceTable.Invoke();

                if (table != null)
                {
                    foreach (var objectOffset in table.ObjectOffsets)
                    {
                        offsets[objectOffset.Key] = objectOffset.Value;
                    }

                    loadedFromTable = true;
                }
            }

            if (offsets.TryGetValue(reference, out offset))
            {
                return(true);
            }

            var locations = searcher.GetObjectLocations();

            if (locations.TryGetValue(reference, out offset))
            {
                return(true);
            }

            return(false);
        }
Ejemplo n.º 6
0
        public CosBase Parse(IRandomAccessRead reader, long objectNumber, int objectGeneration,
                             CosObjectPool pool, CrossReferenceTable crossReferenceTable,
                             BruteForceSearcher bruteForceSearcher,
                             bool isLenient,
                             bool requireExistingObject)
        {
            if (pool == null)
            {
                throw new ArgumentNullException(nameof(pool));
            }

            var key = new CosObjectKey(objectNumber, objectGeneration);

            var pdfObject = pool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            if (crossReferenceTable == null)
            {
                throw new ArgumentNullException(nameof(crossReferenceTable));
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenient && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                return(CosNull.Null);
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, pool, isLenient));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, objectNumber, pool, crossReferenceTable, bruteForceSearcher, isLenient));
        }
Ejemplo n.º 7
0
        public void BruteForceSearcherCorrectlyFindsAllObjectsWhenOffset()
        {
            var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));

            input.Seek(593);

            var locations = BruteForceSearcher.GetObjectLocations(input);

            Assert.Equal(TestDataOffsets, locations.Values);
        }
Ejemplo n.º 8
0
        public void ReaderOnlyCallsOnce()
        {
            var reader = StringBytesTestConverter.Convert(TestData, false);

            var searcher = new BruteForceSearcher(reader.Bytes);

            var locations = searcher.GetObjectLocations();

            Assert.Equal(4, locations.Count);

            var newLocations = searcher.GetObjectLocations();

            Assert.Equal(4, locations.Count);

            foreach (var keyValuePair in locations)
            {
                Assert.Contains(newLocations.Keys, x => x.Equals(keyValuePair.Key));
            }
        }
Ejemplo n.º 9
0
        public void SearcherFindsCorrectObjects()
        {
            var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));

            var locations = BruteForceSearcher.GetObjectLocations(input);

            Assert.Equal(4, locations.Count);

            Assert.Equal(TestDataOffsets, locations.Values);
        }
Ejemplo n.º 10
0
        private IReadOnlyDictionary <IndirectReference, long> getBFCosObjectOffsets()
        {
            if (objectKeyOffsets == null)
            {
                var offsets = bruteForceSearcher.GetObjectLocations();

                objectKeyOffsets = offsets;
            }

            return(objectKeyOffsets);
        }
Ejemplo n.º 11
0
        public void ReaderOnlyCallsOnce()
        {
            var bytes = OtherEncodings.StringAsLatin1Bytes(TestData);

            var reader = new ThrowingReader(new RandomAccessBuffer(bytes));

            var searcher = new BruteForceSearcher(reader);

            var locations = searcher.GetObjectLocations();

            Assert.Equal(4, locations.Count);

            reader.Throw = true;

            var newLocations = searcher.GetObjectLocations();

            Assert.Equal(4, locations.Count);

            foreach (var keyValuePair in locations)
            {
                Assert.Contains(newLocations.Keys, x => ReferenceEquals(x, keyValuePair.Key));
            }
        }
Ejemplo n.º 12
0
        public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false)
        {
            var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation);

            var pdfObject = objectPool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenientParsing && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                if (isLenientParsing)
                {
                    return(CosNull.Null);
                }

                throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table.");
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing));
        }
Ejemplo n.º 13
0
        public void SearcherFindsCorrectObjects()
        {
            var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));

            var searcher = new BruteForceSearcher(input);

            var locations = searcher.GetObjectLocations();

            Assert.Equal(4, locations.Count);

            Assert.Equal(locations.Values, new long[]
            {
                TestData.IndexOf("2 17 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("4 0 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
            });
        }
Ejemplo n.º 14
0
        public void SearcherFindsCorrectObjects()
        {
            var bytes = OtherEncodings.StringAsLatin1Bytes(TestData);

            var reader = new RandomAccessBuffer(bytes);

            var searcher = new BruteForceSearcher(reader);

            var locations = searcher.GetObjectLocations();

            Assert.Equal(4, locations.Count);

            Assert.Equal(locations.Values, new long[]
            {
                TestData.IndexOf("2 0 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("4 0 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
            });
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Check that the offsets in the cross reference are correct.
        /// </summary>
        public static bool ValidateCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable crossReferenceTable, ILog log,
                                                         out IReadOnlyDictionary <IndirectReference, long> actualOffsets)
        {
            actualOffsets = crossReferenceTable.ObjectOffsets;

            if (ValidateXrefOffsets(bytes, crossReferenceTable.ObjectOffsets, log))
            {
                return(true);
            }

            var builderOffsets = new Dictionary <IndirectReference, long>();

            var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes);

            if (bruteForceOffsets.Count > 0)
            {
                // find all object streams
                foreach (var entry in crossReferenceTable.ObjectOffsets)
                {
                    var offset = entry.Value;
                    if (offset < 0)
                    {
                        // Trust stream offsets for now.
                        // TODO: more validation of streams.
                        builderOffsets[entry.Key] = entry.Value;
                    }

                    foreach (var item in bruteForceOffsets)
                    {
                        builderOffsets[item.Key] = item.Value;
                    }
                }

                actualOffsets = builderOffsets;
            }

            return(false);
        }
Ejemplo n.º 16
0
        public void ReaderNull_Throws()
        {
            Action action = () => BruteForceSearcher.GetObjectLocations(null);

            Assert.Throws <ArgumentNullException>(action);
        }
        /// <summary>
        /// Check that the offsets in the cross reference are correct.
        /// </summary>
        public static bool ValidateCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable crossReferenceTable, ILog log,
                                                         out IReadOnlyDictionary <IndirectReference, long> actualOffsets)
        {
            actualOffsets = crossReferenceTable.ObjectOffsets;

            if (ValidateXrefOffsets(bytes, crossReferenceTable.ObjectOffsets, log))
            {
                return(true);
            }

            var builderOffsets = new Dictionary <IndirectReference, long>();

            var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes);

            if (bruteForceOffsets.Count > 0)
            {
                var objStreams = new List <IndirectReference>();

                // find all object streams
                foreach (var entry in crossReferenceTable.ObjectOffsets)
                {
                    var offset = entry.Value;
                    if (offset < 0)
                    {
                        var objStream = new IndirectReference(-offset, 0);
                        if (!objStreams.Contains(objStream))
                        {
                            objStreams.Add(new IndirectReference(-offset, 0));
                        }
                    }

                    // remove all found object streams
                    if (objStreams.Count > 0)
                    {
                        foreach (var key in objStreams)
                        {
                            if (bruteForceOffsets.ContainsKey(key))
                            {
                                // remove all parsed objects which are part of an object stream
                                //ISet<long> objects = xrefTrailerResolver
                                //    .getContainedObjectNumbers((int)(key.Number));
                                //foreach (long objNr in objects)
                                //{
                                //    CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0);

                                //    if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0)
                                //    {
                                //        bfCOSObjectKeyOffsets.Remove(streamObjectKey);
                                //    }
                                //}
                            }
                            else
                            {
                                // remove all objects which are part of an object stream which wasn't found
                                //ISet<long> objects = xrefTrailerResolver
                                //    .getContainedObjectNumbers((int)(key.Number));
                                //foreach (long objNr in objects)
                                //{
                                //    xrefOffset.Remove(new CosObjectKey(objNr, 0));
                                //}
                            }
                        }
                    }

                    foreach (var item in bruteForceOffsets)
                    {
                        builderOffsets[item.Key] = item.Value;
                    }
                }

                actualOffsets = builderOffsets;
            }

            return(false);
        }