public void ReaderEscapesUnexpectedObject() { const string s = @"%PDF-1.7 abcd 1 0 obj << /Type /Any >> endobj %AZ 0 obj 11 0 obj 769 endobj %%EOF"; var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s)); var locations = BruteForceSearcher.GetObjectLocations(bytes); Assert.Equal(2, locations.Count); var expectedLocations = new long[] { s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase), s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase) }; Assert.Equal(expectedLocations, locations.Values); }
public void ReaderEscapesUnexpectedGenerationNumber() { const string s = @"%PDF-2.0 abcdefghijklmnop 1 0 obj 256 endobj 16-0 obj 5 0 obj << /IsEmpty false >> endobj"; var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s)); var locations = BruteForceSearcher.GetObjectLocations(bytes); Assert.Equal(2, locations.Count); var expectedLocations = new long[] { s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase), s.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase) }; Assert.Equal(expectedLocations, locations.Values); }
public void BruteForceSearcherFileOffsetsCorrect() { using (var fs = File.OpenRead(IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf"))) { var bytes = new StreamInputBytes(fs); var searcher = new BruteForceSearcher(bytes); var locations = searcher.GetObjectLocations(); Assert.Equal(13, locations.Count); Assert.Equal(6183, locations[new IndirectReference(1, 0)]); Assert.Equal(244, locations[new IndirectReference(2, 0)]); Assert.Equal(15, locations[new IndirectReference(3, 0)]); Assert.Equal(222, locations[new IndirectReference(4, 0)]); Assert.Equal(5766, locations[new IndirectReference(5, 0)]); Assert.Equal(353, locations[new IndirectReference(6, 0)]); Assert.Equal(581, locations[new IndirectReference(7, 0)]); Assert.Equal(5068, locations[new IndirectReference(8, 0)]); Assert.Equal(5091, locations[new IndirectReference(9, 0)]); var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]); Assert.StartsWith("3 0 obj", s); } }
public void BruteForceSearcherFileOffsetsCorrectOpenOffice() { var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf"))); var locations = BruteForceSearcher.GetObjectLocations(bytes); Assert.Equal(13, locations.Count); Assert.Equal(17, locations[new IndirectReference(1, 0)]); Assert.Equal(249, locations[new IndirectReference(2, 0)]); Assert.Equal(14291, locations[new IndirectReference(3, 0)]); Assert.Equal(275, locations[new IndirectReference(4, 0)]); Assert.Equal(382, locations[new IndirectReference(5, 0)]); Assert.Equal(13283, locations[new IndirectReference(6, 0)]); Assert.Equal(13309, locations[new IndirectReference(7, 0)]); Assert.Equal(13556, locations[new IndirectReference(8, 0)]); Assert.Equal(13926, locations[new IndirectReference(9, 0)]); Assert.Equal(14183, locations[new IndirectReference(10, 0)]); Assert.Equal(14224, locations[new IndirectReference(11, 0)]); Assert.Equal(14428, locations[new IndirectReference(12, 0)]); Assert.Equal(14488, locations[new IndirectReference(13, 0)]); var s = GetStringAt(bytes, locations[new IndirectReference(12, 0)]); Assert.StartsWith("12 0 obj", s); }
public bool TryGetOffset(IndirectReference reference, out long offset) { if (!loadedFromTable) { var table = crossReferenceTable.Invoke(); if (table != null) { foreach (var objectOffset in table.ObjectOffsets) { offsets[objectOffset.Key] = objectOffset.Value; } loadedFromTable = true; } } if (offsets.TryGetValue(reference, out offset)) { return(true); } var locations = searcher.GetObjectLocations(); if (locations.TryGetValue(reference, out offset)) { return(true); } return(false); }
public CosBase Parse(IRandomAccessRead reader, long objectNumber, int objectGeneration, CosObjectPool pool, CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenient, bool requireExistingObject) { if (pool == null) { throw new ArgumentNullException(nameof(pool)); } var key = new CosObjectKey(objectNumber, objectGeneration); var pdfObject = pool.GetOrCreateDefault(key); if (pdfObject.GetObject() != null) { return(pdfObject.GetObject()); } if (crossReferenceTable == null) { throw new ArgumentNullException(nameof(crossReferenceTable)); } var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets); if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0)) { throw new InvalidOperationException("Object must be defined and not compressed: " + key); } if (isLenient && offsetOrStreamNumber == null) { var locations = bruteForceSearcher.GetObjectLocations(); offsetOrStreamNumber = TryGet(key, locations); if (offsetOrStreamNumber != null) { crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value); } } if (offsetOrStreamNumber == null) { return(CosNull.Null); } var isCompressedStreamObject = offsetOrStreamNumber <= 0; if (!isCompressedStreamObject) { return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, pool, isLenient)); } return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, objectNumber, pool, crossReferenceTable, bruteForceSearcher, isLenient)); }
public void BruteForceSearcherCorrectlyFindsAllObjectsWhenOffset() { var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData)); input.Seek(593); var locations = BruteForceSearcher.GetObjectLocations(input); Assert.Equal(TestDataOffsets, locations.Values); }
public void ReaderOnlyCallsOnce() { var reader = StringBytesTestConverter.Convert(TestData, false); var searcher = new BruteForceSearcher(reader.Bytes); var locations = searcher.GetObjectLocations(); Assert.Equal(4, locations.Count); var newLocations = searcher.GetObjectLocations(); Assert.Equal(4, locations.Count); foreach (var keyValuePair in locations) { Assert.Contains(newLocations.Keys, x => x.Equals(keyValuePair.Key)); } }
public void SearcherFindsCorrectObjects() { var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData)); var locations = BruteForceSearcher.GetObjectLocations(input); Assert.Equal(4, locations.Count); Assert.Equal(TestDataOffsets, locations.Values); }
private IReadOnlyDictionary <IndirectReference, long> getBFCosObjectOffsets() { if (objectKeyOffsets == null) { var offsets = bruteForceSearcher.GetObjectLocations(); objectKeyOffsets = offsets; } return(objectKeyOffsets); }
public void ReaderOnlyCallsOnce() { var bytes = OtherEncodings.StringAsLatin1Bytes(TestData); var reader = new ThrowingReader(new RandomAccessBuffer(bytes)); var searcher = new BruteForceSearcher(reader); var locations = searcher.GetObjectLocations(); Assert.Equal(4, locations.Count); reader.Throw = true; var newLocations = searcher.GetObjectLocations(); Assert.Equal(4, locations.Count); foreach (var keyValuePair in locations) { Assert.Contains(newLocations.Keys, x => ReferenceEquals(x, keyValuePair.Key)); } }
public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false) { var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation); var pdfObject = objectPool.GetOrCreateDefault(key); if (pdfObject.GetObject() != null) { return(pdfObject.GetObject()); } var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets); if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0)) { throw new InvalidOperationException("Object must be defined and not compressed: " + key); } if (isLenientParsing && offsetOrStreamNumber == null) { var locations = bruteForceSearcher.GetObjectLocations(); offsetOrStreamNumber = TryGet(key, locations); if (offsetOrStreamNumber != null) { crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value); } } if (offsetOrStreamNumber == null) { if (isLenientParsing) { return(CosNull.Null); } throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table."); } var isCompressedStreamObject = offsetOrStreamNumber <= 0; if (!isCompressedStreamObject) { return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing)); } return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing)); }
public void SearcherFindsCorrectObjects() { var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData)); var searcher = new BruteForceSearcher(input); var locations = searcher.GetObjectLocations(); Assert.Equal(4, locations.Count); Assert.Equal(locations.Values, new long[] { TestData.IndexOf("2 17 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("4 0 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase) }); }
public void SearcherFindsCorrectObjects() { var bytes = OtherEncodings.StringAsLatin1Bytes(TestData); var reader = new RandomAccessBuffer(bytes); var searcher = new BruteForceSearcher(reader); var locations = searcher.GetObjectLocations(); Assert.Equal(4, locations.Count); Assert.Equal(locations.Values, new long[] { TestData.IndexOf("2 0 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("4 0 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase) }); }
/// <summary> /// Check that the offsets in the cross reference are correct. /// </summary> public static bool ValidateCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable crossReferenceTable, ILog log, out IReadOnlyDictionary <IndirectReference, long> actualOffsets) { actualOffsets = crossReferenceTable.ObjectOffsets; if (ValidateXrefOffsets(bytes, crossReferenceTable.ObjectOffsets, log)) { return(true); } var builderOffsets = new Dictionary <IndirectReference, long>(); var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes); if (bruteForceOffsets.Count > 0) { // find all object streams foreach (var entry in crossReferenceTable.ObjectOffsets) { var offset = entry.Value; if (offset < 0) { // Trust stream offsets for now. // TODO: more validation of streams. builderOffsets[entry.Key] = entry.Value; } foreach (var item in bruteForceOffsets) { builderOffsets[item.Key] = item.Value; } } actualOffsets = builderOffsets; } return(false); }
public void ReaderNull_Throws() { Action action = () => BruteForceSearcher.GetObjectLocations(null); Assert.Throws <ArgumentNullException>(action); }
/// <summary> /// Check that the offsets in the cross reference are correct. /// </summary> public static bool ValidateCrossReferenceOffsets(IInputBytes bytes, CrossReferenceTable crossReferenceTable, ILog log, out IReadOnlyDictionary <IndirectReference, long> actualOffsets) { actualOffsets = crossReferenceTable.ObjectOffsets; if (ValidateXrefOffsets(bytes, crossReferenceTable.ObjectOffsets, log)) { return(true); } var builderOffsets = new Dictionary <IndirectReference, long>(); var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes); if (bruteForceOffsets.Count > 0) { var objStreams = new List <IndirectReference>(); // find all object streams foreach (var entry in crossReferenceTable.ObjectOffsets) { var offset = entry.Value; if (offset < 0) { var objStream = new IndirectReference(-offset, 0); if (!objStreams.Contains(objStream)) { objStreams.Add(new IndirectReference(-offset, 0)); } } // remove all found object streams if (objStreams.Count > 0) { foreach (var key in objStreams) { if (bruteForceOffsets.ContainsKey(key)) { // remove all parsed objects which are part of an object stream //ISet<long> objects = xrefTrailerResolver // .getContainedObjectNumbers((int)(key.Number)); //foreach (long objNr in objects) //{ // CosObjectKey streamObjectKey = new CosObjectKey(objNr, 0); // if (bfCOSObjectKeyOffsets.TryGetValue(streamObjectKey, out long streamObjectOffset) && streamObjectOffset > 0) // { // bfCOSObjectKeyOffsets.Remove(streamObjectKey); // } //} } else { // remove all objects which are part of an object stream which wasn't found //ISet<long> objects = xrefTrailerResolver // .getContainedObjectNumbers((int)(key.Number)); //foreach (long objNr in objects) //{ // xrefOffset.Remove(new CosObjectKey(objNr, 0)); //} } } } foreach (var item in bruteForceOffsets) { builderOffsets[item.Key] = item.Value; } } actualOffsets = builderOffsets; } return(false); }