Exemplo n.º 1
0
        public void ReaderEscapesUnexpectedObject()
        {
            const string s = @"%PDF-1.7
abcd

1 0 obj
<< /Type /Any >>

endobj

%AZ 0 obj
11 0 obj
769
endobj

%%EOF";

            var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));

            var locations = BruteForceSearcher.GetObjectLocations(bytes);

            Assert.Equal(2, locations.Count);

            var expectedLocations = new long[]
            {
                s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
                s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase)
            };

            Assert.Equal(expectedLocations, locations.Values);
        }
Exemplo n.º 2
0
        public void ReaderEscapesUnexpectedGenerationNumber()
        {
            const string s = @"%PDF-2.0
abcdefghijklmnop

1 0 obj
256
endobj

16-0 obj

5 0 obj
<< /IsEmpty false >>
endobj";

            var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));

            var locations = BruteForceSearcher.GetObjectLocations(bytes);

            Assert.Equal(2, locations.Count);

            var expectedLocations = new long[]
            {
                s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
                s.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
            };

            Assert.Equal(expectedLocations, locations.Values);
        }
Exemplo n.º 3
0
        private static void AddAdobeFontMetrics(string fontName, string afmName, Standard14Font?type = null)
        {
            Standard14Names.Add(fontName);
            Standard14Mapping.Add(fontName, afmName);

            if (Standard14AfmMap.TryGetValue(afmName, out var metrics))
            {
                Standard14AfmMap[fontName] = metrics;
            }

            try
            {
                var assembly = typeof(Standard14).Assembly;

                var name = $"UglyToad.PdfPig.Resources.AdobeFontMetrics.{afmName}.afm";

                IInputBytes bytes;
                using (var memory = new MemoryStream())
                    using (var resource = assembly.GetManifestResourceStream(name))
                    {
                        resource.CopyTo(memory);
                        bytes = new ByteArrayInputBytes(memory.ToArray());
                    }

                Standard14AfmMap[fontName] = Parser.Parse(bytes, true);
                if (type.HasValue)
                {
                    Standard14AfmTypeMap[type.Value] = Standard14AfmMap[fontName];
                }
            }
            catch (Exception ex)
            {
                throw new InvalidOperationException($"Could not load {fontName} from the AFM files.", ex);
            }
        }
Exemplo n.º 4
0
        public void ReadsStreamWithoutBreakBeforeEndstream()
        {
            const string s          = @"
1 0 obj
12
endobj

7 0 obj
<< /Length 288
   /Filter /FlateDecode >>
stream
xœ]‘ËjÃ0E÷ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A-YYøï+Ï4¡t#qfîFWQY*­Dïv5:è”–§ñjB‹½Òa¤ •p7¤K	ƒÈûëyr8Tº!Ïà  úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8;¤iL°!Ø %É`K°ßì¸ÃöÜáÜ)	[‚#CFðÄ°#(yƒg^ÿ¶æò
ÿž“¸Zë#¢?¢h–P”Æû?šÑï÷ø¯‰Šendstream
endobj

9 0 obj
16
endobj";
            var          inputBytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));

            var scanner = new PdfTokenScanner(inputBytes, new TestObjectLocationProvider(), new TestFilterProvider());

            var token = ReadToEnd(scanner)[1];

            Assert.Equal(7, token.Number.ObjectNumber);
        }
Exemplo n.º 5
0
        internal static (CoreTokenScanner scanner, IInputBytes bytes) Scanner(string s)
        {
            var inputBytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
            var result     = new CoreTokenScanner(inputBytes);

            return(result, inputBytes);
        }
Exemplo n.º 6
0
        public void BruteForceSearcherFileOffsetsCorrectOpenOffice()
        {
            var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf")));

            var locations = BruteForceSearcher.GetObjectLocations(bytes);

            Assert.Equal(13, locations.Count);

            Assert.Equal(17, locations[new IndirectReference(1, 0)]);
            Assert.Equal(249, locations[new IndirectReference(2, 0)]);
            Assert.Equal(14291, locations[new IndirectReference(3, 0)]);
            Assert.Equal(275, locations[new IndirectReference(4, 0)]);
            Assert.Equal(382, locations[new IndirectReference(5, 0)]);
            Assert.Equal(13283, locations[new IndirectReference(6, 0)]);
            Assert.Equal(13309, locations[new IndirectReference(7, 0)]);
            Assert.Equal(13556, locations[new IndirectReference(8, 0)]);
            Assert.Equal(13926, locations[new IndirectReference(9, 0)]);
            Assert.Equal(14183, locations[new IndirectReference(10, 0)]);
            Assert.Equal(14224, locations[new IndirectReference(11, 0)]);
            Assert.Equal(14428, locations[new IndirectReference(12, 0)]);
            Assert.Equal(14488, locations[new IndirectReference(13, 0)]);

            var s = GetStringAt(bytes, locations[new IndirectReference(12, 0)]);

            Assert.StartsWith("12 0 obj", s);
        }
Exemplo n.º 7
0
        public void DoesNotStartWithOpenBracket_ReturnsFalse(char firstByte)
        {
            var input = new ByteArrayInputBytes(new[] {(byte)firstByte});

            var result = tokenizer.TryTokenize((byte)firstByte, input, out var token);

            Assert.False(result);
            Assert.Null(token);
        }
Exemplo n.º 8
0
        public void ArrayAndStreamBehaveTheSame()
        {
            var bytes = OtherEncodings.StringAsLatin1Bytes(TestData);

            var array = new ByteArrayInputBytes(bytes);

            using (var memoryStream = new MemoryStream(bytes))
            {
                var stream = new StreamInputBytes(memoryStream);

                Assert.Equal(bytes.Length, array.Length);
                Assert.Equal(bytes.Length, stream.Length);

                Assert.Equal(0, array.CurrentOffset);
                Assert.Equal(0, stream.CurrentOffset);

                array.Seek(5);
                stream.Seek(5);

                Assert.Equal(array.CurrentOffset, stream.CurrentOffset);

                Assert.Equal((byte)'5', array.CurrentByte);
                Assert.Equal(array.CurrentByte, stream.CurrentByte);

                Assert.Equal(array.Peek(), stream.Peek());

                array.Seek(0);
                stream.Seek(0);

                Assert.Equal(0, array.CurrentByte);
                Assert.Equal(array.CurrentByte, stream.CurrentByte);

                array.Seek(7);
                stream.Seek(7);

                var arrayString  = string.Empty;
                var streamString = string.Empty;

                while (array.MoveNext())
                {
                    arrayString += (char)array.CurrentByte;
                }

                while (stream.MoveNext())
                {
                    streamString += (char)stream.CurrentByte;
                }

                Assert.Equal("89", streamString);

                Assert.Equal(arrayString, streamString);

                Assert.True(stream.IsAtEnd());
                Assert.True(array.IsAtEnd());
            }
        }
Exemplo n.º 9
0
        public void BruteForceSearcherCorrectlyFindsAllObjectsWhenOffset()
        {
            var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));

            input.Seek(593);

            var locations = BruteForceSearcher.GetObjectLocations(input);

            Assert.Equal(TestDataOffsets, locations.Values);
        }
Exemplo n.º 10
0
        public void CanParseHelveticaAfmFile()
        {
            var helvetica = GetResourceBytes("UglyToad.PdfPig.Fonts.Resources.AdobeFontMetrics.Helvetica.afm");

            var input = new ByteArrayInputBytes(helvetica);

            var metrics = AdobeFontMetricsParser.Parse(input, false);

            Assert.NotNull(metrics);
        }
Exemplo n.º 11
0
        public void CanParseAllPredefinedCMaps(string resourceName)
        {
            Debug.WriteLine("Parsing: " + resourceName);

            var input = new ByteArrayInputBytes(ReadResourceBytes(resourceName));

            var cmap = cMapParser.Parse(input, false);

            Assert.NotNull(cmap);
        }
Exemplo n.º 12
0
        public void SearcherFindsCorrectObjects()
        {
            var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));

            var locations = BruteForceSearcher.GetObjectLocations(input);

            Assert.Equal(4, locations.Count);

            Assert.Equal(TestDataOffsets, locations.Values);
        }
Exemplo n.º 13
0
        public void Issue334()
        {
            var input = OtherEncodings.StringAsLatin1Bytes("%PDF-1.7\r\n%âãÏÓ\r\n1 0 obj\r\n<</Lang(en-US)>>\r\nendobj");

            var bytes = new ByteArrayInputBytes(input);

            var scanner = new CoreTokenScanner(bytes, ScannerScope.None);

            var result = FileHeaderParser.Parse(scanner, bytes, false, log);

            Assert.Equal(1.7m, result.Version);
        }
Exemplo n.º 14
0
        /// <summary>
        /// Merge the set of PDF documents.
        /// </summary>
        public static byte[] Merge(IReadOnlyList <byte[]> files, IReadOnlyList <IReadOnlyList <int> > pagesBundle = null)
        {
            if (files == null)
            {
                throw new ArgumentNullException(nameof(files));
            }

            const bool isLenientParsing = false;

            var documentBuilder = new DocumentMerger();

            foreach (var fileIndex in Enumerable.Range(0, files.Count))
            {
                var file = files[fileIndex];

                IReadOnlyList <int> pages = null;
                if (pagesBundle != null && fileIndex < pagesBundle.Count)
                {
                    pages = pagesBundle[fileIndex];
                }

                var inputBytes  = new ByteArrayInputBytes(file);
                var coreScanner = new CoreTokenScanner(inputBytes);

                var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);

                var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log),
                                                                    new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));

                CrossReferenceTable crossReference = null;

                // ReSharper disable once AccessToModifiedClosure
                var locationProvider = new ObjectLocationProvider(() => crossReference, inputBytes);

                var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance);

                var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing);
                crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner);

                var catalogDictionaryToken = ParseCatalog(crossReference, pdfScanner, out var encryptionDictionary);
                if (encryptionDictionary != null)
                {
                    throw new PdfDocumentEncryptedException("Unable to merge document with password");
                }

                var documentCatalog = CatalogFactory.Create(crossReference.Trailer.Root, catalogDictionaryToken, pdfScanner, isLenientParsing);

                documentBuilder.AppendDocument(documentCatalog, version.Version, pdfScanner, pages);
            }

            return(documentBuilder.Build());
        }
Exemplo n.º 15
0
        public void HandlesUtf16BigEndianStrings()
        {
            var input = new ByteArrayInputBytes(new byte[]
            {
                0xFF, 0xFE, 0x4D, 0x00, 0x69, 0x00, 0x63,
                0x00, 0x29
            });

            var result = tokenizer.TryTokenize(0x28, input, out var token);

            Assert.True(result);

            Assert.Equal(@"Mic", AssertStringToken(token).Data);
        }
        private void Run(byte[] bytes, bool checkHeaderChecksum, bool checkWholeFileChecksum)
        {
            var inputBytes = new ByteArrayInputBytes(bytes);

            var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(inputBytes));

            inputBytes = new ByteArrayInputBytes(bytes);

            foreach (var header in font.TableHeaders)
            {
                // Acts as the whole table checksum
                if (header.Key == "head")
                {
                    if (checkHeaderChecksum)
                    {
                        var headerChecksum = TrueTypeChecksumCalculator.Calculate(inputBytes, header.Value);

                        Assert.Equal(header.Value.CheckSum, headerChecksum);
                    }

                    continue;
                }

                var input = bytes.Skip((int)header.Value.Offset).Take((int)header.Value.Length);

                var checksum = TrueTypeChecksumCalculator.Calculate(input);

                Assert.Equal(header.Value.CheckSum, checksum);

                var checksumByTable = TrueTypeChecksumCalculator.Calculate(inputBytes, header.Value);

                Assert.Equal(header.Value.CheckSum, checksumByTable);
            }

            if (checkWholeFileChecksum)
            {
                var headerActual       = font.TableHeaders["head"];
                var wholeFontChecksum  = TrueTypeChecksumCalculator.CalculateWholeFontChecksum(inputBytes, headerActual);
                var adjustment         = 0xB1B0AFBA - wholeFontChecksum;
                var adjustmentRecorded = font.TableRegister.HeaderTable.CheckSumAdjustment;

                Assert.Equal(adjustmentRecorded, adjustment);

                var expectedWholeFontChecksum = 0xB1B0AFBA - adjustmentRecorded;

                Assert.Equal(expectedWholeFontChecksum, wholeFontChecksum);
            }
        }
Exemplo n.º 17
0
        public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null)
        {
            var container = Bootstrapper.GenerateContainer(options?.Logger);

            var isLenientParsing = options?.UseLenientParsing ?? true;

            var reader = new RandomAccessBuffer(fileBytes);

            var inputBytes = new ByteArrayInputBytes(fileBytes);

            var tokenScanner = new CoreTokenScanner(inputBytes);

            var document = OpenDocument(reader, inputBytes, tokenScanner, container, isLenientParsing);

            return(document);
        }
Exemplo n.º 18
0
        public void ReadUnsignedInt()
        {
            var input = new ByteArrayInputBytes(new byte[]
            {
                220,
                43,
                250,
                6
            });

            var data = new TrueTypeDataBytes(input);

            var result = data.ReadUnsignedInt();

            Assert.Equal(3693869574L, result);
        }
Exemplo n.º 19
0
        public void SearcherFindsCorrectObjects()
        {
            var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));

            var searcher = new BruteForceSearcher(input);

            var locations = searcher.GetObjectLocations();

            Assert.Equal(4, locations.Count);

            Assert.Equal(locations.Values, new long[]
            {
                TestData.IndexOf("2 17 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("4 0 obj", StringComparison.OrdinalIgnoreCase),
                TestData.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
            });
        }
Exemplo n.º 20
0
        public static Result Convert(string s, bool readFirst = true)
        {
            var input = new ByteArrayInputBytes(Encoding.UTF8.GetBytes(s));

            byte initialByte = 0;

            if (readFirst)
            {
                input.MoveNext();
                initialByte = input.CurrentByte;
            }

            return(new Result
            {
                First = initialByte,
                Bytes = input
            });
        }
Exemplo n.º 21
0
        public void CanParseIdentityHorizontalCMap()
        {
            var input = new ByteArrayInputBytes(ReadResourceBytes("UglyToad.Pdf.Resources.CMap.Identity-H"));

            var cmap = cMapParser.Parse(input, false);

            Assert.Equal(1, cmap.CodespaceRanges.Count);

            var range = cmap.CodespaceRanges[0];

            Assert.Equal(0, range.StartInt);
            Assert.Equal(65535, range.EndInt);

            Assert.Equal(2, range.CodeLength);

            Assert.Equal(256, cmap.CidRanges.Count);

            Assert.Equal("10.003", cmap.Version);
        }
Exemplo n.º 22
0
        public void BruteForceSearcherBytesFileOffsetsCorrect()
        {
            var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf")));

            var locations = BruteForceSearcher.GetObjectLocations(bytes);

            Assert.Equal(13, locations.Count);

            Assert.Equal(6183, locations[new IndirectReference(1, 0)]);
            Assert.Equal(244, locations[new IndirectReference(2, 0)]);
            Assert.Equal(15, locations[new IndirectReference(3, 0)]);
            Assert.Equal(222, locations[new IndirectReference(4, 0)]);
            Assert.Equal(5766, locations[new IndirectReference(5, 0)]);
            Assert.Equal(353, locations[new IndirectReference(6, 0)]);
            Assert.Equal(581, locations[new IndirectReference(7, 0)]);
            Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
            Assert.Equal(5091, locations[new IndirectReference(9, 0)]);

            var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);

            Assert.StartsWith("3 0 obj", s);
        }
Exemplo n.º 23
0
        public void ColorspaceParserError()
        {
            var parser         = new CodespaceRangeParser();
            var byteArrayInput = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes("1 begincodespacerange\nendcodespacerange"));
            var tokenScanner   = new CoreTokenScanner(byteArrayInput);

            Assert.True(tokenScanner.MoveNext());
            Assert.True(tokenScanner.CurrentToken is NumericToken);
            var numeric = (NumericToken)tokenScanner.CurrentToken;

            Assert.True(tokenScanner.MoveNext());
            Assert.True(tokenScanner.CurrentToken is OperatorToken);
            var opToken = (OperatorToken)tokenScanner.CurrentToken;

            Assert.Equal("begincodespacerange", opToken.Data);

            var cmapBuilder = new CharacterMapBuilder();

            parser.Parse(numeric, tokenScanner, cmapBuilder);

            Assert.Empty(cmapBuilder.CodespaceRanges);
        }
Exemplo n.º 24
0
        /// <summary>
        /// Parses an embedded Adobe Type 1 font file.
        /// </summary>
        /// <param name="inputBytes">The bytes of the font program.</param>
        /// <param name="length1">The length in bytes of the clear text portion of the font program.</param>
        /// <param name="length2">The length in bytes of the encrypted portion of the font program.</param>
        /// <returns>The parsed type 1 font.</returns>
        public Type1FontProgram Parse(IInputBytes inputBytes, int length1, int length2)
        {
            // Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way.
            var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator;

            IReadOnlyList <byte> eexecPortion = new byte[0];

            if (isEntirePfbFile)
            {
                var(ascii, binary) = ReadPfbHeader(inputBytes);

                eexecPortion = binary;
                inputBytes   = new ByteArrayInputBytes(ascii);
            }

            var scanner = new CoreTokenScanner(inputBytes);

            if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
            {
                throw new InvalidFontFormatException("The Type1 program did not start with '%!'.");
            }

            string name;
            var    parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            if (parts.Length == 3)
            {
                name = parts[1];
            }
            else
            {
                name = "Unknown";
            }

            var comments = new List <string>();

            while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken)
            {
                comments.Add(commentToken.Data);
            }

            var dictionaries = new List <DictionaryToken>();

            // Override arrays and names since type 1 handles these differently.
            var arrayTokenizer = new Type1ArrayTokenizer();
            var nameTokenizer  = new Type1NameTokenizer();

            scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
            scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);

            try
            {
                var tempEexecPortion = new List <byte>();
                var tokenSet         = new PreviousTokenSet();
                tokenSet.Add(scanner.CurrentToken);
                while (scanner.MoveNext())
                {
                    if (scanner.CurrentToken is OperatorToken operatorToken)
                    {
                        if (Equals(scanner.CurrentToken, OperatorToken.Eexec))
                        {
                            int offset = 0;

                            while (inputBytes.MoveNext())
                            {
                                if (inputBytes.CurrentByte == (byte)ClearToMark[offset])
                                {
                                    offset++;
                                }
                                else
                                {
                                    if (offset > 0)
                                    {
                                        for (int i = 0; i < offset; i++)
                                        {
                                            tempEexecPortion.Add((byte)ClearToMark[i]);
                                        }
                                    }

                                    offset = 0;
                                }

                                if (offset == ClearToMark.Length)
                                {
                                    break;
                                }

                                if (offset > 0)
                                {
                                    continue;
                                }

                                tempEexecPortion.Add(inputBytes.CurrentByte);
                            }
                        }
                        else
                        {
                            HandleOperator(operatorToken, scanner, tokenSet, dictionaries);
                        }
                    }

                    tokenSet.Add(scanner.CurrentToken);
                }

                if (!isEntirePfbFile)
                {
                    eexecPortion = tempEexecPortion;
                }
            }
            finally
            {
                scanner.DeregisterCustomTokenizer(arrayTokenizer);
                scanner.DeregisterCustomTokenizer(nameTokenizer);
            }

            var encoding    = GetEncoding(dictionaries);
            var matrix      = GetFontMatrix(dictionaries);
            var boundingBox = GetBoundingBox(dictionaries);

            var(privateDictionary, charStrings) = encryptedPortionParser.Parse(eexecPortion, false);

            return(new Type1FontProgram(name, encoding, matrix, boundingBox ?? new PdfRectangle(), privateDictionary, charStrings));
        }
Exemplo n.º 25
0
        /// <inheritdoc />
        public void Run(IOperationContext operationContext)
        {
            var input = new ByteArrayInputBytes(Text != null ? OtherEncodings.StringAsLatin1Bytes(Text) : Bytes);

            operationContext.ShowText(input);
        }
Exemplo n.º 26
0
        public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null)
        {
            var inputBytes = new ByteArrayInputBytes(fileBytes);

            return(Open(inputBytes, options));
        }