public IFont Generate(DictionaryToken dictionary, bool isLenientParsing) { var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont); var cMap = ReadEncoding(dictionary, out var isCMapPredefined); ICidFont cidFont; if (TryGetFirstDescendant(dictionary, out var descendantObject)) { DictionaryToken descendantFontDictionary; if (descendantObject is IndirectReferenceToken obj) { var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, scanner); descendantFontDictionary = parsed; } else { descendantFontDictionary = (DictionaryToken) descendantObject; } cidFont = ParseDescendant(descendantFontDictionary, isLenientParsing); } else { throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary); } var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont); CMap toUnicodeCMap = null; if (dictionary.ContainsKey(NameToken.ToUnicode)) { var toUnicodeValue = dictionary.Data[NameToken.ToUnicode]; if (DirectObjectFinder.TryGet<StreamToken>(toUnicodeValue, scanner, out var toUnicodeStream)) { var decodedUnicodeCMap = toUnicodeStream?.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } else if (DirectObjectFinder.TryGet<NameToken>(toUnicodeValue, scanner, out var toUnicodeName)) { toUnicodeCMap = CMapCache.Get(toUnicodeName.Data); } else { throw new PdfDocumentFormatException($"Invalid type of toUnicode CMap encountered. Got: {toUnicodeValue}."); } } var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean); return font; }
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing) { var usingStandard14Only = !dictionary.ContainsKey(NameToken.FirstChar) || !dictionary.ContainsKey(NameToken.Widths); if (usingStandard14Only) { // TODO: some fonts combine standard 14 font with other metrics. if (!dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) || !(baseFontToken is NameToken standard14Name)) { throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}"); } var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Data); return(new Type1Standard14Font(metrics)); } var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary); var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary); var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing); var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, fontDescriptorFactory, dictionary, isLenientParsing); var font = ParseType1Font(descriptor, isLenientParsing); var name = FontDictionaryAccessHelper.GetName(pdfScanner, dictionary, descriptor, isLenientParsing); CMap toUnicodeCMap = null; if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj)) { var toUnicode = DirectObjectFinder.Get <StreamToken>(toUnicodeObj, pdfScanner); var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); if (decodedUnicodeCMap != null) { toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor); if (encoding == null && font?.Encoding.Count > 0) { encoding = new BuiltInEncoding(font.Encoding); } return(new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap)); }
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation, long offsetCorrection, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner) { long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing); if (fixedOffset > -1) { crossReferenceLocation = fixedOffset; log.Debug($"Found the first cross reference table or stream at {fixedOffset}."); } var table = new CrossReferenceTableBuilder(); var prevSet = new HashSet <long>(); long previousCrossReferenceLocation = crossReferenceLocation; var missedAttempts = 0; // Parse all cross reference tables and streams. while (previousCrossReferenceLocation > 0 && missedAttempts < 100) { log.Debug($"Reading cross reference table or stream at {previousCrossReferenceLocation}."); if (previousCrossReferenceLocation >= bytes.Length) { break; } // seek to xref table tokenScanner.Seek(previousCrossReferenceLocation); tokenScanner.MoveNext(); if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref") { missedAttempts = 0; log.Debug("Element was cross reference table."); CrossReferenceTablePart tablePart = CrossReferenceTableParser.Parse(tokenScanner, previousCrossReferenceLocation, isLenientParsing); var nextOffset = tablePart.GetPreviousOffset(); if (nextOffset >= 0) { nextOffset += offsetCorrection; } previousCrossReferenceLocation = nextOffset; DictionaryToken tableDictionary = tablePart.Dictionary; CrossReferenceTablePart streamPart = null; // check for a XRef stream, it may contain some object ids of compressed objects if (tableDictionary.ContainsKey(NameToken.XrefStm)) { log.Debug("Cross reference table contained referenced to stream. Reading the stream."); int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int; // check the xref stream reference fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing); if (fixedOffset > -1 && fixedOffset != streamOffset) { log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}"); streamOffset = (int)fixedOffset; // Update the cross reference table to be a stream instead. tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset)); tablePart = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset, tablePart.Previous, tableDictionary, tablePart.Type); } // Read the stream from the table. if (streamOffset > 0) { try { TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart); } catch (InvalidOperationException ex) { if (isLenientParsing) { log.Error("Failed to parse /XRefStm at offset " + streamOffset, ex); } else { throw; } } } else { if (isLenientParsing) { log.Error("Skipped XRef stream due to a corrupt offset:" + streamOffset); } else { throw new PdfDocumentFormatException("Skipped XRef stream due to a corrupt offset:" + streamOffset); } } } table.Add(tablePart); if (streamPart != null) { table.Add(streamPart); } }
public CrossReferenceTable Build(long firstCrossReferenceOffset, ILog log) { CrossReferenceType type = CrossReferenceType.Table; DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary <NameToken, IToken>()); Dictionary <IndirectReference, long> objectOffsets = new Dictionary <IndirectReference, long>(); List <long> xrefSeqBytePos = new List <long>(); var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset); if (currentPart == null) { // no XRef at given position log.Warn("Did not found XRef object at specified startxref position " + firstCrossReferenceOffset); // use all objects in byte position order (last entries overwrite previous ones) xrefSeqBytePos.AddRange(parts.Select(x => x.Offset)); xrefSeqBytePos.Sort(); } else { // copy xref type type = currentPart.Type; // found starting Xref object // add this and follow chain defined by 'Prev' keys xrefSeqBytePos.Add(firstCrossReferenceOffset); while (currentPart.Dictionary != null) { long prevBytePos = currentPart.GetPreviousOffset(); if (prevBytePos == -1) { break; } currentPart = parts.FirstOrDefault(x => x.Offset == prevBytePos); if (currentPart == null) { log.Warn("Did not found XRef object pointed to by 'Prev' key at position " + prevBytePos); break; } xrefSeqBytePos.Add(prevBytePos); // sanity check to prevent infinite loops if (xrefSeqBytePos.Count >= parts.Count) { break; } } // have to reverse order so that later XRefs will overwrite previous ones xrefSeqBytePos.Reverse(); } // merge used and sorted XRef/trailer foreach (long bPos in xrefSeqBytePos) { var currentObject = parts.First(x => x.Offset == bPos); if (currentObject.Dictionary != null) { foreach (var entry in currentObject.Dictionary.Data) { /* * If we're at a second trailer, we have a linearized pdf file, meaning that the first Size entry represents * all of the objects so we don't need to grab the second. */ if (!entry.Key.Equals("Size", StringComparison.OrdinalIgnoreCase) || !trailerDictionary.ContainsKey(NameToken.Size)) { trailerDictionary = trailerDictionary.With(entry.Key, entry.Value); } } } foreach (var item in currentObject.ObjectOffsets) { objectOffsets[item.Key] = item.Value; } } return(new CrossReferenceTable(type, objectOffsets, new TrailerDictionary(trailerDictionary), parts.Select(x => { var prev = x.GetPreviousOffset(); return new CrossReferenceTable.CrossReferenceOffset(x.Offset, prev >= 0 ? prev : default(long?)); }).ToList())); }
public IFont Generate(DictionaryToken dictionary) { var usingStandard14Only = !dictionary.ContainsKey(NameToken.FirstChar) || !dictionary.ContainsKey(NameToken.Widths); if (usingStandard14Only) { // TODO: some fonts combine standard 14 font with other metrics. if (!dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) || !(baseFontToken is NameToken standard14Name)) { throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}"); } var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Data); if (metrics != null) { var overrideEncoding = encodingReader.Read(dictionary); return(new Type1Standard14Font(metrics, overrideEncoding)); } } int firstCharacter, lastCharacter; double[] widths; if (!usingStandard14Only) { firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary); lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary); widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary); } else { firstCharacter = 0; lastCharacter = 0; widths = EmptyArray <double> .Instance; } if (!dictionary.TryGet(NameToken.FontDescriptor, out var _)) { if (dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) && DirectObjectFinder.TryGet(baseFontToken, pdfScanner, out NameToken baseFontName)) { var metrics = Standard14.GetAdobeFontMetrics(baseFontName.Data); var overrideEncoding = encodingReader.Read(dictionary); return(new Type1Standard14Font(metrics, overrideEncoding)); } } var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, dictionary); var font = ParseFontProgram(descriptor); var name = FontDictionaryAccessHelper.GetName(pdfScanner, dictionary, descriptor); CMap toUnicodeCMap = null; if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj)) { var toUnicode = DirectObjectFinder.Get <StreamToken>(toUnicodeObj, pdfScanner); var decodedUnicodeCMap = toUnicode?.Decode(filterProvider, pdfScanner); if (decodedUnicodeCMap != null) { toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap)); } } var fromFont = default(Encoding); if (font != null) { if (font.TryGetFirst(out var t1Font)) { fromFont = t1Font.Encoding != null ? new BuiltInEncoding(t1Font.Encoding) : default(Encoding); } else if (font.TryGetSecond(out var cffFont)) { fromFont = cffFont.FirstFont?.Encoding; } } var encoding = encodingReader.Read(dictionary, descriptor, fromFont); if (encoding == null && font != null && font.TryGetFirst(out var t1FontReplacment)) { encoding = new BuiltInEncoding(t1FontReplacment.Encoding); } return(new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap, font)); }