public void ReaderEscapesUnexpectedObject() { const string s = @"%PDF-1.7 abcd 1 0 obj << /Type /Any >> endobj %AZ 0 obj 11 0 obj 769 endobj %%EOF"; var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s)); var locations = BruteForceSearcher.GetObjectLocations(bytes); Assert.Equal(2, locations.Count); var expectedLocations = new long[] { s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase), s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase) }; Assert.Equal(expectedLocations, locations.Values); }
public void ReaderEscapesUnexpectedGenerationNumber() { const string s = @"%PDF-2.0 abcdefghijklmnop 1 0 obj 256 endobj 16-0 obj 5 0 obj << /IsEmpty false >> endobj"; var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s)); var locations = BruteForceSearcher.GetObjectLocations(bytes); Assert.Equal(2, locations.Count); var expectedLocations = new long[] { s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase), s.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase) }; Assert.Equal(expectedLocations, locations.Values); }
private static void AddAdobeFontMetrics(string fontName, string afmName, Standard14Font?type = null) { Standard14Names.Add(fontName); Standard14Mapping.Add(fontName, afmName); if (Standard14AfmMap.TryGetValue(afmName, out var metrics)) { Standard14AfmMap[fontName] = metrics; } try { var assembly = typeof(Standard14).Assembly; var name = $"UglyToad.PdfPig.Resources.AdobeFontMetrics.{afmName}.afm"; IInputBytes bytes; using (var memory = new MemoryStream()) using (var resource = assembly.GetManifestResourceStream(name)) { resource.CopyTo(memory); bytes = new ByteArrayInputBytes(memory.ToArray()); } Standard14AfmMap[fontName] = Parser.Parse(bytes, true); if (type.HasValue) { Standard14AfmTypeMap[type.Value] = Standard14AfmMap[fontName]; } } catch (Exception ex) { throw new InvalidOperationException($"Could not load {fontName} from the AFM files.", ex); } }
public void ReadsStreamWithoutBreakBeforeEndstream() { const string s = @" 1 0 obj 12 endobj 7 0 obj << /Length 288 /Filter /FlateDecode >> stream xœ]‘ËjÃ0E÷ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A-YYøï+Ï4¡t#qfîFWQY*Dïv5:è”–§ñjB‹½Òa¤ •p7¤K ƒÈûëyr8Tº!Ïà úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8;¤iL°!Ø %É`K°ßì¸ÃöÜáÜ) [‚#CFðÄ°#(yƒg^ÿ¶æò ÿž“¸Zë#¢?¢h–P”Æû?šÑï÷ø¯‰Šendstream endobj 9 0 obj 16 endobj"; var inputBytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s)); var scanner = new PdfTokenScanner(inputBytes, new TestObjectLocationProvider(), new TestFilterProvider()); var token = ReadToEnd(scanner)[1]; Assert.Equal(7, token.Number.ObjectNumber); }
internal static (CoreTokenScanner scanner, IInputBytes bytes) Scanner(string s) { var inputBytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s)); var result = new CoreTokenScanner(inputBytes); return(result, inputBytes); }
public void BruteForceSearcherFileOffsetsCorrectOpenOffice() { var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf"))); var locations = BruteForceSearcher.GetObjectLocations(bytes); Assert.Equal(13, locations.Count); Assert.Equal(17, locations[new IndirectReference(1, 0)]); Assert.Equal(249, locations[new IndirectReference(2, 0)]); Assert.Equal(14291, locations[new IndirectReference(3, 0)]); Assert.Equal(275, locations[new IndirectReference(4, 0)]); Assert.Equal(382, locations[new IndirectReference(5, 0)]); Assert.Equal(13283, locations[new IndirectReference(6, 0)]); Assert.Equal(13309, locations[new IndirectReference(7, 0)]); Assert.Equal(13556, locations[new IndirectReference(8, 0)]); Assert.Equal(13926, locations[new IndirectReference(9, 0)]); Assert.Equal(14183, locations[new IndirectReference(10, 0)]); Assert.Equal(14224, locations[new IndirectReference(11, 0)]); Assert.Equal(14428, locations[new IndirectReference(12, 0)]); Assert.Equal(14488, locations[new IndirectReference(13, 0)]); var s = GetStringAt(bytes, locations[new IndirectReference(12, 0)]); Assert.StartsWith("12 0 obj", s); }
public void DoesNotStartWithOpenBracket_ReturnsFalse(char firstByte) { var input = new ByteArrayInputBytes(new[] {(byte)firstByte}); var result = tokenizer.TryTokenize((byte)firstByte, input, out var token); Assert.False(result); Assert.Null(token); }
public void ArrayAndStreamBehaveTheSame() { var bytes = OtherEncodings.StringAsLatin1Bytes(TestData); var array = new ByteArrayInputBytes(bytes); using (var memoryStream = new MemoryStream(bytes)) { var stream = new StreamInputBytes(memoryStream); Assert.Equal(bytes.Length, array.Length); Assert.Equal(bytes.Length, stream.Length); Assert.Equal(0, array.CurrentOffset); Assert.Equal(0, stream.CurrentOffset); array.Seek(5); stream.Seek(5); Assert.Equal(array.CurrentOffset, stream.CurrentOffset); Assert.Equal((byte)'5', array.CurrentByte); Assert.Equal(array.CurrentByte, stream.CurrentByte); Assert.Equal(array.Peek(), stream.Peek()); array.Seek(0); stream.Seek(0); Assert.Equal(0, array.CurrentByte); Assert.Equal(array.CurrentByte, stream.CurrentByte); array.Seek(7); stream.Seek(7); var arrayString = string.Empty; var streamString = string.Empty; while (array.MoveNext()) { arrayString += (char)array.CurrentByte; } while (stream.MoveNext()) { streamString += (char)stream.CurrentByte; } Assert.Equal("89", streamString); Assert.Equal(arrayString, streamString); Assert.True(stream.IsAtEnd()); Assert.True(array.IsAtEnd()); } }
public void BruteForceSearcherCorrectlyFindsAllObjectsWhenOffset() { var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData)); input.Seek(593); var locations = BruteForceSearcher.GetObjectLocations(input); Assert.Equal(TestDataOffsets, locations.Values); }
public void CanParseHelveticaAfmFile() { var helvetica = GetResourceBytes("UglyToad.PdfPig.Fonts.Resources.AdobeFontMetrics.Helvetica.afm"); var input = new ByteArrayInputBytes(helvetica); var metrics = AdobeFontMetricsParser.Parse(input, false); Assert.NotNull(metrics); }
public void CanParseAllPredefinedCMaps(string resourceName) { Debug.WriteLine("Parsing: " + resourceName); var input = new ByteArrayInputBytes(ReadResourceBytes(resourceName)); var cmap = cMapParser.Parse(input, false); Assert.NotNull(cmap); }
public void SearcherFindsCorrectObjects() { var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData)); var locations = BruteForceSearcher.GetObjectLocations(input); Assert.Equal(4, locations.Count); Assert.Equal(TestDataOffsets, locations.Values); }
public void Issue334() { var input = OtherEncodings.StringAsLatin1Bytes("%PDF-1.7\r\n%âãÏÓ\r\n1 0 obj\r\n<</Lang(en-US)>>\r\nendobj"); var bytes = new ByteArrayInputBytes(input); var scanner = new CoreTokenScanner(bytes, ScannerScope.None); var result = FileHeaderParser.Parse(scanner, bytes, false, log); Assert.Equal(1.7m, result.Version); }
/// <summary> /// Merge the set of PDF documents. /// </summary> public static byte[] Merge(IReadOnlyList <byte[]> files, IReadOnlyList <IReadOnlyList <int> > pagesBundle = null) { if (files == null) { throw new ArgumentNullException(nameof(files)); } const bool isLenientParsing = false; var documentBuilder = new DocumentMerger(); foreach (var fileIndex in Enumerable.Range(0, files.Count)) { var file = files[fileIndex]; IReadOnlyList <int> pages = null; if (pagesBundle != null && fileIndex < pagesBundle.Count) { pages = pagesBundle[fileIndex]; } var inputBytes = new ByteArrayInputBytes(file); var coreScanner = new CoreTokenScanner(inputBytes); var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log); var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log), new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider)); CrossReferenceTable crossReference = null; // ReSharper disable once AccessToModifiedClosure var locationProvider = new ObjectLocationProvider(() => crossReference, inputBytes); var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance); var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing); crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner); var catalogDictionaryToken = ParseCatalog(crossReference, pdfScanner, out var encryptionDictionary); if (encryptionDictionary != null) { throw new PdfDocumentEncryptedException("Unable to merge document with password"); } var documentCatalog = CatalogFactory.Create(crossReference.Trailer.Root, catalogDictionaryToken, pdfScanner, isLenientParsing); documentBuilder.AppendDocument(documentCatalog, version.Version, pdfScanner, pages); } return(documentBuilder.Build()); }
public void HandlesUtf16BigEndianStrings() { var input = new ByteArrayInputBytes(new byte[] { 0xFF, 0xFE, 0x4D, 0x00, 0x69, 0x00, 0x63, 0x00, 0x29 }); var result = tokenizer.TryTokenize(0x28, input, out var token); Assert.True(result); Assert.Equal(@"Mic", AssertStringToken(token).Data); }
private void Run(byte[] bytes, bool checkHeaderChecksum, bool checkWholeFileChecksum) { var inputBytes = new ByteArrayInputBytes(bytes); var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(inputBytes)); inputBytes = new ByteArrayInputBytes(bytes); foreach (var header in font.TableHeaders) { // Acts as the whole table checksum if (header.Key == "head") { if (checkHeaderChecksum) { var headerChecksum = TrueTypeChecksumCalculator.Calculate(inputBytes, header.Value); Assert.Equal(header.Value.CheckSum, headerChecksum); } continue; } var input = bytes.Skip((int)header.Value.Offset).Take((int)header.Value.Length); var checksum = TrueTypeChecksumCalculator.Calculate(input); Assert.Equal(header.Value.CheckSum, checksum); var checksumByTable = TrueTypeChecksumCalculator.Calculate(inputBytes, header.Value); Assert.Equal(header.Value.CheckSum, checksumByTable); } if (checkWholeFileChecksum) { var headerActual = font.TableHeaders["head"]; var wholeFontChecksum = TrueTypeChecksumCalculator.CalculateWholeFontChecksum(inputBytes, headerActual); var adjustment = 0xB1B0AFBA - wholeFontChecksum; var adjustmentRecorded = font.TableRegister.HeaderTable.CheckSumAdjustment; Assert.Equal(adjustmentRecorded, adjustment); var expectedWholeFontChecksum = 0xB1B0AFBA - adjustmentRecorded; Assert.Equal(expectedWholeFontChecksum, wholeFontChecksum); } }
public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) { var container = Bootstrapper.GenerateContainer(options?.Logger); var isLenientParsing = options?.UseLenientParsing ?? true; var reader = new RandomAccessBuffer(fileBytes); var inputBytes = new ByteArrayInputBytes(fileBytes); var tokenScanner = new CoreTokenScanner(inputBytes); var document = OpenDocument(reader, inputBytes, tokenScanner, container, isLenientParsing); return(document); }
public void ReadUnsignedInt() { var input = new ByteArrayInputBytes(new byte[] { 220, 43, 250, 6 }); var data = new TrueTypeDataBytes(input); var result = data.ReadUnsignedInt(); Assert.Equal(3693869574L, result); }
public void SearcherFindsCorrectObjects() { var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData)); var searcher = new BruteForceSearcher(input); var locations = searcher.GetObjectLocations(); Assert.Equal(4, locations.Count); Assert.Equal(locations.Values, new long[] { TestData.IndexOf("2 17 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("4 0 obj", StringComparison.OrdinalIgnoreCase), TestData.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase) }); }
public static Result Convert(string s, bool readFirst = true) { var input = new ByteArrayInputBytes(Encoding.UTF8.GetBytes(s)); byte initialByte = 0; if (readFirst) { input.MoveNext(); initialByte = input.CurrentByte; } return(new Result { First = initialByte, Bytes = input }); }
public void CanParseIdentityHorizontalCMap() { var input = new ByteArrayInputBytes(ReadResourceBytes("UglyToad.Pdf.Resources.CMap.Identity-H")); var cmap = cMapParser.Parse(input, false); Assert.Equal(1, cmap.CodespaceRanges.Count); var range = cmap.CodespaceRanges[0]; Assert.Equal(0, range.StartInt); Assert.Equal(65535, range.EndInt); Assert.Equal(2, range.CodeLength); Assert.Equal(256, cmap.CidRanges.Count); Assert.Equal("10.003", cmap.Version); }
public void BruteForceSearcherBytesFileOffsetsCorrect() { var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf"))); var locations = BruteForceSearcher.GetObjectLocations(bytes); Assert.Equal(13, locations.Count); Assert.Equal(6183, locations[new IndirectReference(1, 0)]); Assert.Equal(244, locations[new IndirectReference(2, 0)]); Assert.Equal(15, locations[new IndirectReference(3, 0)]); Assert.Equal(222, locations[new IndirectReference(4, 0)]); Assert.Equal(5766, locations[new IndirectReference(5, 0)]); Assert.Equal(353, locations[new IndirectReference(6, 0)]); Assert.Equal(581, locations[new IndirectReference(7, 0)]); Assert.Equal(5068, locations[new IndirectReference(8, 0)]); Assert.Equal(5091, locations[new IndirectReference(9, 0)]); var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]); Assert.StartsWith("3 0 obj", s); }
public void ColorspaceParserError() { var parser = new CodespaceRangeParser(); var byteArrayInput = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes("1 begincodespacerange\nendcodespacerange")); var tokenScanner = new CoreTokenScanner(byteArrayInput); Assert.True(tokenScanner.MoveNext()); Assert.True(tokenScanner.CurrentToken is NumericToken); var numeric = (NumericToken)tokenScanner.CurrentToken; Assert.True(tokenScanner.MoveNext()); Assert.True(tokenScanner.CurrentToken is OperatorToken); var opToken = (OperatorToken)tokenScanner.CurrentToken; Assert.Equal("begincodespacerange", opToken.Data); var cmapBuilder = new CharacterMapBuilder(); parser.Parse(numeric, tokenScanner, cmapBuilder); Assert.Empty(cmapBuilder.CodespaceRanges); }
/// <summary> /// Parses an embedded Adobe Type 1 font file. /// </summary> /// <param name="inputBytes">The bytes of the font program.</param> /// <param name="length1">The length in bytes of the clear text portion of the font program.</param> /// <param name="length2">The length in bytes of the encrypted portion of the font program.</param> /// <returns>The parsed type 1 font.</returns> public Type1FontProgram Parse(IInputBytes inputBytes, int length1, int length2) { // Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way. var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator; IReadOnlyList <byte> eexecPortion = new byte[0]; if (isEntirePfbFile) { var(ascii, binary) = ReadPfbHeader(inputBytes); eexecPortion = binary; inputBytes = new ByteArrayInputBytes(ascii); } var scanner = new CoreTokenScanner(inputBytes); if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!")) { throw new InvalidFontFormatException("The Type1 program did not start with '%!'."); } string name; var parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length == 3) { name = parts[1]; } else { name = "Unknown"; } var comments = new List <string>(); while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken) { comments.Add(commentToken.Data); } var dictionaries = new List <DictionaryToken>(); // Override arrays and names since type 1 handles these differently. var arrayTokenizer = new Type1ArrayTokenizer(); var nameTokenizer = new Type1NameTokenizer(); scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer); scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer); try { var tempEexecPortion = new List <byte>(); var tokenSet = new PreviousTokenSet(); tokenSet.Add(scanner.CurrentToken); while (scanner.MoveNext()) { if (scanner.CurrentToken is OperatorToken operatorToken) { if (Equals(scanner.CurrentToken, OperatorToken.Eexec)) { int offset = 0; while (inputBytes.MoveNext()) { if (inputBytes.CurrentByte == (byte)ClearToMark[offset]) { offset++; } else { if (offset > 0) { for (int i = 0; i < offset; i++) { tempEexecPortion.Add((byte)ClearToMark[i]); } } offset = 0; } if (offset == ClearToMark.Length) { break; } if (offset > 0) { continue; } tempEexecPortion.Add(inputBytes.CurrentByte); } } else { HandleOperator(operatorToken, scanner, tokenSet, dictionaries); } } tokenSet.Add(scanner.CurrentToken); } if (!isEntirePfbFile) { eexecPortion = tempEexecPortion; } } finally { scanner.DeregisterCustomTokenizer(arrayTokenizer); scanner.DeregisterCustomTokenizer(nameTokenizer); } var encoding = GetEncoding(dictionaries); var matrix = GetFontMatrix(dictionaries); var boundingBox = GetBoundingBox(dictionaries); var(privateDictionary, charStrings) = encryptedPortionParser.Parse(eexecPortion, false); return(new Type1FontProgram(name, encoding, matrix, boundingBox ?? new PdfRectangle(), privateDictionary, charStrings)); }
/// <inheritdoc /> public void Run(IOperationContext operationContext) { var input = new ByteArrayInputBytes(Text != null ? OtherEncodings.StringAsLatin1Bytes(Text) : Bytes); operationContext.ShowText(input); }
public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) { var inputBytes = new ByteArrayInputBytes(fileBytes); return(Open(inputBytes, options)); }