예제 #1
0
        public IFont Generate(DictionaryToken dictionary, bool isLenientParsing)
        {
            var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont);

            var cMap = ReadEncoding(dictionary, out var isCMapPredefined);

            ICidFont cidFont;

            if (TryGetFirstDescendant(dictionary, out var descendantObject))
            {
                DictionaryToken descendantFontDictionary;

                if (descendantObject is IndirectReferenceToken obj)
                {
                    var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, scanner);

                    descendantFontDictionary = parsed;
                }
                else
                {
                    descendantFontDictionary = (DictionaryToken) descendantObject;
                }

                cidFont = ParseDescendant(descendantFontDictionary, isLenientParsing);
            }
            else
            {
                throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
            }

            var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont);

            CMap toUnicodeCMap = null;
            if (dictionary.ContainsKey(NameToken.ToUnicode))
            {
                var toUnicodeValue = dictionary.Data[NameToken.ToUnicode];

                if (DirectObjectFinder.TryGet<StreamToken>(toUnicodeValue, scanner, out var toUnicodeStream))
                {
                    var decodedUnicodeCMap = toUnicodeStream?.Decode(filterProvider);

                    if (decodedUnicodeCMap != null)
                    {
                        toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
                    }
                }
                else if (DirectObjectFinder.TryGet<NameToken>(toUnicodeValue, scanner, out var toUnicodeName))
                {
                    toUnicodeCMap = CMapCache.Get(toUnicodeName.Data);
                }
                else
                {
                    throw new PdfDocumentFormatException($"Invalid type of toUnicode CMap encountered. Got: {toUnicodeValue}.");
                }
            }

            var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean);

            return font;
        }
예제 #2
0
        public IFont Generate(DictionaryToken dictionary, bool isLenientParsing)
        {
            var usingStandard14Only = !dictionary.ContainsKey(NameToken.FirstChar) || !dictionary.ContainsKey(NameToken.Widths);

            if (usingStandard14Only)
            {
                // TODO: some fonts combine standard 14 font with other metrics.
                if (!dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) ||
                    !(baseFontToken is NameToken standard14Name))
                {
                    throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}");
                }

                var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Data);

                return(new Type1Standard14Font(metrics));
            }

            var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary);

            var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);

            var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing);

            var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, fontDescriptorFactory, dictionary, isLenientParsing);

            var font = ParseType1Font(descriptor, isLenientParsing);

            var name = FontDictionaryAccessHelper.GetName(pdfScanner, dictionary, descriptor, isLenientParsing);

            CMap toUnicodeCMap = null;

            if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj))
            {
                var toUnicode = DirectObjectFinder.Get <StreamToken>(toUnicodeObj, pdfScanner);

                var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);

                if (decodedUnicodeCMap != null)
                {
                    toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
                }
            }

            Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor);

            if (encoding == null && font?.Encoding.Count > 0)
            {
                encoding = new BuiltInEncoding(font.Encoding);
            }

            return(new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap));
        }
예제 #3
0
        public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation,
                                         long offsetCorrection,
                                         IPdfTokenScanner pdfScanner,
                                         ISeekableTokenScanner tokenScanner)
        {
            long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing);

            if (fixedOffset > -1)
            {
                crossReferenceLocation = fixedOffset;

                log.Debug($"Found the first cross reference table or stream at {fixedOffset}.");
            }

            var table = new CrossReferenceTableBuilder();

            var  prevSet = new HashSet <long>();
            long previousCrossReferenceLocation = crossReferenceLocation;

            var missedAttempts = 0;

            // Parse all cross reference tables and streams.
            while (previousCrossReferenceLocation > 0 && missedAttempts < 100)
            {
                log.Debug($"Reading cross reference table or stream at {previousCrossReferenceLocation}.");

                if (previousCrossReferenceLocation >= bytes.Length)
                {
                    break;
                }

                // seek to xref table
                tokenScanner.Seek(previousCrossReferenceLocation);

                tokenScanner.MoveNext();

                if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
                {
                    missedAttempts = 0;
                    log.Debug("Element was cross reference table.");

                    CrossReferenceTablePart tablePart = CrossReferenceTableParser.Parse(tokenScanner,
                                                                                        previousCrossReferenceLocation, isLenientParsing);

                    var nextOffset = tablePart.GetPreviousOffset();

                    if (nextOffset >= 0)
                    {
                        nextOffset += offsetCorrection;
                    }

                    previousCrossReferenceLocation = nextOffset;

                    DictionaryToken tableDictionary = tablePart.Dictionary;

                    CrossReferenceTablePart streamPart = null;

                    // check for a XRef stream, it may contain some object ids of compressed objects
                    if (tableDictionary.ContainsKey(NameToken.XrefStm))
                    {
                        log.Debug("Cross reference table contained referenced to stream. Reading the stream.");

                        int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;

                        // check the xref stream reference
                        fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
                        if (fixedOffset > -1 && fixedOffset != streamOffset)
                        {
                            log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}");

                            streamOffset = (int)fixedOffset;

                            // Update the cross reference table to be a stream instead.
                            tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
                            tablePart       = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset,
                                                                          tablePart.Previous, tableDictionary, tablePart.Type);
                        }

                        // Read the stream from the table.
                        if (streamOffset > 0)
                        {
                            try
                            {
                                TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart);
                            }
                            catch (InvalidOperationException ex)
                            {
                                if (isLenientParsing)
                                {
                                    log.Error("Failed to parse /XRefStm at offset " + streamOffset, ex);
                                }
                                else
                                {
                                    throw;
                                }
                            }
                        }
                        else
                        {
                            if (isLenientParsing)
                            {
                                log.Error("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                            else
                            {
                                throw new PdfDocumentFormatException("Skipped XRef stream due to a corrupt offset:" + streamOffset);
                            }
                        }
                    }

                    table.Add(tablePart);

                    if (streamPart != null)
                    {
                        table.Add(streamPart);
                    }
                }
예제 #4
0
        public CrossReferenceTable Build(long firstCrossReferenceOffset, ILog log)
        {
            CrossReferenceType type = CrossReferenceType.Table;
            DictionaryToken    trailerDictionary = new DictionaryToken(new Dictionary <NameToken, IToken>());
            Dictionary <IndirectReference, long> objectOffsets = new Dictionary <IndirectReference, long>();

            List <long> xrefSeqBytePos = new List <long>();

            var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset);

            if (currentPart == null)
            {
                // no XRef at given position
                log.Warn("Did not found XRef object at specified startxref position " + firstCrossReferenceOffset);

                // use all objects in byte position order (last entries overwrite previous ones)
                xrefSeqBytePos.AddRange(parts.Select(x => x.Offset));
                xrefSeqBytePos.Sort();
            }
            else
            {
                // copy xref type
                type = currentPart.Type;


                // found starting Xref object
                // add this and follow chain defined by 'Prev' keys
                xrefSeqBytePos.Add(firstCrossReferenceOffset);

                while (currentPart.Dictionary != null)
                {
                    long prevBytePos = currentPart.GetPreviousOffset();
                    if (prevBytePos == -1)
                    {
                        break;
                    }

                    currentPart = parts.FirstOrDefault(x => x.Offset == prevBytePos);
                    if (currentPart == null)
                    {
                        log.Warn("Did not found XRef object pointed to by 'Prev' key at position " + prevBytePos);
                        break;
                    }

                    xrefSeqBytePos.Add(prevBytePos);

                    // sanity check to prevent infinite loops
                    if (xrefSeqBytePos.Count >= parts.Count)
                    {
                        break;
                    }
                }

                // have to reverse order so that later XRefs will overwrite previous ones
                xrefSeqBytePos.Reverse();
            }

            // merge used and sorted XRef/trailer
            foreach (long bPos in xrefSeqBytePos)
            {
                var currentObject = parts.First(x => x.Offset == bPos);
                if (currentObject.Dictionary != null)
                {
                    foreach (var entry in currentObject.Dictionary.Data)
                    {
                        /*
                         * If we're at a second trailer, we have a linearized pdf file, meaning that the first Size entry represents
                         * all of the objects so we don't need to grab the second.
                         */
                        if (!entry.Key.Equals("Size", StringComparison.OrdinalIgnoreCase) ||
                            !trailerDictionary.ContainsKey(NameToken.Size))
                        {
                            trailerDictionary = trailerDictionary.With(entry.Key, entry.Value);
                        }
                    }
                }

                foreach (var item in currentObject.ObjectOffsets)
                {
                    objectOffsets[item.Key] = item.Value;
                }
            }

            return(new CrossReferenceTable(type, objectOffsets, new TrailerDictionary(trailerDictionary),
                                           parts.Select(x =>
            {
                var prev = x.GetPreviousOffset();

                return new CrossReferenceTable.CrossReferenceOffset(x.Offset, prev >= 0 ? prev : default(long?));
            }).ToList()));
        }
예제 #5
0
        public IFont Generate(DictionaryToken dictionary)
        {
            var usingStandard14Only = !dictionary.ContainsKey(NameToken.FirstChar) || !dictionary.ContainsKey(NameToken.Widths);

            if (usingStandard14Only)
            {
                // TODO: some fonts combine standard 14 font with other metrics.
                if (!dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) ||
                    !(baseFontToken is NameToken standard14Name))
                {
                    throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}");
                }

                var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Data);

                if (metrics != null)
                {
                    var overrideEncoding = encodingReader.Read(dictionary);

                    return(new Type1Standard14Font(metrics, overrideEncoding));
                }
            }

            int firstCharacter, lastCharacter;

            double[] widths;
            if (!usingStandard14Only)
            {
                firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary);

                lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);

                widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary);
            }
            else
            {
                firstCharacter = 0;
                lastCharacter  = 0;
                widths         = EmptyArray <double> .Instance;
            }

            if (!dictionary.TryGet(NameToken.FontDescriptor, out var _))
            {
                if (dictionary.TryGet(NameToken.BaseFont, out var baseFontToken) &&
                    DirectObjectFinder.TryGet(baseFontToken, pdfScanner, out NameToken baseFontName))
                {
                    var metrics = Standard14.GetAdobeFontMetrics(baseFontName.Data);

                    var overrideEncoding = encodingReader.Read(dictionary);

                    return(new Type1Standard14Font(metrics, overrideEncoding));
                }
            }

            var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, dictionary);

            var font = ParseFontProgram(descriptor);

            var name = FontDictionaryAccessHelper.GetName(pdfScanner, dictionary, descriptor);

            CMap toUnicodeCMap = null;

            if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj))
            {
                var toUnicode = DirectObjectFinder.Get <StreamToken>(toUnicodeObj, pdfScanner);

                var decodedUnicodeCMap = toUnicode?.Decode(filterProvider, pdfScanner);

                if (decodedUnicodeCMap != null)
                {
                    toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap));
                }
            }


            var fromFont = default(Encoding);

            if (font != null)
            {
                if (font.TryGetFirst(out var t1Font))
                {
                    fromFont = t1Font.Encoding != null ? new BuiltInEncoding(t1Font.Encoding) : default(Encoding);
                }
                else if (font.TryGetSecond(out var cffFont))
                {
                    fromFont = cffFont.FirstFont?.Encoding;
                }
            }

            var encoding = encodingReader.Read(dictionary, descriptor, fromFont);

            if (encoding == null && font != null && font.TryGetFirst(out var t1FontReplacment))
            {
                encoding = new BuiltInEncoding(t1FontReplacment.Encoding);
            }

            return(new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap, font));
        }