Ejemplo n.º 1
0
        public IFont Get(DictionaryToken dictionary, bool isLenientParsing)
        {
            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Font))
            {
                var message = "The font dictionary did not have type 'Font'. " + dictionary;

                if (isLenientParsing)
                {
                    log?.Error(message);
                }
                else
                {
                    throw new InvalidFontFormatException(message);
                }
            }

            var subtype = dictionary.GetNameOrDefault(NameToken.Subtype);

            if (handlers.TryGetValue(subtype, out var handler))
            {
                return(handler.Generate(dictionary, isLenientParsing));
            }

            throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue.");
        }
Ejemplo n.º 2
0
        public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing)
        {
            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (!NameToken.Font.Equals(type))
            {
                throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type}\'");
            }

            var widths = ReadWidths(dictionary);

            var defaultWidth = default(double?);

            if (dictionary.TryGet(NameToken.Dw, pdfScanner, out NumericToken defaultWidthToken))
            {
                defaultWidth = defaultWidthToken.Double;
            }

            var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);

            FontDescriptor descriptor = null;

            if (TryGetFontDescriptor(dictionary, out var descriptorDictionary))
            {
                descriptor = descriptorFactory.Generate(descriptorDictionary, pdfScanner, isLenientParsing);
            }

            var fontProgram = ReadDescriptorFile(descriptor);

            var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont);

            var systemInfo = GetSystemInfo(dictionary);

            var subType = dictionary.GetNameOrDefault(NameToken.Subtype);

            if (NameToken.CidFontType0.Equals(subType))
            {
                return(new Type0CidFont(fontProgram, type, subType, baseFont, systemInfo, descriptor, verticalWritingMetrics, widths, defaultWidth));
            }

            if (NameToken.CidFontType2.Equals(subType))
            {
                var cidToGid = GetCharacterIdentifierToGlyphIndexMap(dictionary, isLenientParsing);

                return(new Type2CidFont(type, subType, baseFont, systemInfo, descriptor, fontProgram, verticalWritingMetrics, widths, defaultWidth, cidToGid));
            }

            return(null);
        }
Ejemplo n.º 3
0
        private static CMap GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont)
        {
            if (!isCMapPredefined)
            {
                return(null);
            }

            /*
             * If the font is a composite font that uses one of the predefined CMaps except Identity–H and Identity–V or whose descendant
             * CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or Adobe-Korea1 character collection use a UCS2 CMap.
             */

            var encodingName = dictionary.GetNameOrDefault(NameToken.Encoding);

            if (encodingName == null)
            {
                return(null);
            }

            var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV);

            if (isPredefinedIdentityMap && !usesDescendantAdobeFont)
            {
                return(null);
            }

            throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue.");
        }
Ejemplo n.º 4
0
        public IFont Generate(DictionaryToken dictionary, bool isLenientParsing)
        {
            var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont);

            var cMap = ReadEncoding(dictionary, out var isCMapPredefined);

            ICidFont cidFont;

            if (TryGetFirstDescendant(dictionary, out var descendantObject))
            {
                DictionaryToken descendantFontDictionary;

                if (descendantObject is IndirectReferenceToken obj)
                {
                    var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, scanner);

                    descendantFontDictionary = parsed;
                }
                else
                {
                    descendantFontDictionary = (DictionaryToken) descendantObject;
                }

                cidFont = ParseDescendant(descendantFontDictionary, isLenientParsing);
            }
            else
            {
                throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
            }

            var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont);

            CMap toUnicodeCMap = null;
            if (dictionary.ContainsKey(NameToken.ToUnicode))
            {
                var toUnicodeValue = dictionary.Data[NameToken.ToUnicode];

                if (DirectObjectFinder.TryGet<StreamToken>(toUnicodeValue, scanner, out var toUnicodeStream))
                {
                    var decodedUnicodeCMap = toUnicodeStream?.Decode(filterProvider);

                    if (decodedUnicodeCMap != null)
                    {
                        toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
                    }
                }
                else if (DirectObjectFinder.TryGet<NameToken>(toUnicodeValue, scanner, out var toUnicodeName))
                {
                    toUnicodeCMap = CMapCache.Get(toUnicodeName.Data);
                }
                else
                {
                    throw new PdfDocumentFormatException($"Invalid type of toUnicode CMap encountered. Got: {toUnicodeValue}.");
                }
            }

            var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean);

            return font;
        }
Ejemplo n.º 5
0
        public bool FindPage(DictionaryToken currentPageDictionary, int soughtPageNumber, List <int> pageNumbersObserved, PageTreeMembers pageTreeMembers)
        {
            var type = currentPageDictionary.GetNameOrDefault(NameToken.Type);

            if (type?.Equals(NameToken.Page) == true)
            {
                var pageNumber = GetNextPageNumber(pageNumbersObserved);

                bool found = pageNumber == soughtPageNumber;

                locatedPages[pageNumber] = currentPageDictionary;
                pageNumbersObserved.Add(pageNumber);

                return(found);
            }

            if (type?.Equals(NameToken.Pages) != true)
            {
                log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);

                return(false);
            }

            if (currentPageDictionary.TryGet(NameToken.MediaBox, out var token))
            {
                var mediaBox = DirectObjectFinder.Get <ArrayToken>(token, pdfScanner);

                pageTreeMembers.MediaBox = new MediaBox(new PdfRectangle(mediaBox.GetNumeric(0).Data,
                                                                         mediaBox.GetNumeric(1).Data,
                                                                         mediaBox.GetNumeric(2).Data,
                                                                         mediaBox.GetNumeric(3).Data));
            }

            if (!currentPageDictionary.TryGet(NameToken.Kids, out var kids) ||
                !(kids is ArrayToken kidsArray))
            {
                return(false);
            }

            pageFactory.LoadResources(currentPageDictionary, isLenientParsing);

            bool childFound = false;

            foreach (var kid in kidsArray.Data)
            {
                // todo: exit early
                var child = DirectObjectFinder.Get <DictionaryToken>(kid, pdfScanner);

                var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved, pageTreeMembers);

                if (thisPageMatches)
                {
                    childFound = true;
                    break;
                }
            }

            return(childFound);
        }
Ejemplo n.º 6
0
        private static (CMap, bool isChineseJapaneseOrKorean) GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, ICidFont cidFont)
        {
            if (!isCMapPredefined)
            {
                return(null, false);
            }

            /*
             * If the font is a composite font that uses one of the predefined CMaps except Identity–H and Identity–V or whose descendant
             * CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or Adobe-Korea1 character collection use a UCS2 CMap.
             */

            var encodingName = dictionary.GetNameOrDefault(NameToken.Encoding);

            if (encodingName == null)
            {
                return(null, false);
            }

            var isChineseJapaneseOrKorean = false;

            if (cidFont != null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase))
            {
                isChineseJapaneseOrKorean = string.Equals(cidFont.SystemInfo.Ordering, "GB1", StringComparison.OrdinalIgnoreCase) ||
                                            string.Equals(cidFont.SystemInfo.Ordering, "CNS1", StringComparison.OrdinalIgnoreCase) ||
                                            string.Equals(cidFont.SystemInfo.Ordering, "Japan1", StringComparison.OrdinalIgnoreCase) ||
                                            string.Equals(cidFont.SystemInfo.Ordering, "Korea1", StringComparison.OrdinalIgnoreCase);
            }


            var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV);

            if (isPredefinedIdentityMap && !isChineseJapaneseOrKorean)
            {
                return(null, false);
            }

            if (!isChineseJapaneseOrKorean)
            {
                return(null, false);
            }

            var fullCmapName   = cidFont.SystemInfo.ToString();
            var nonUnicodeCMap = CMapCache.Get(fullCmapName);

            if (nonUnicodeCMap == null)
            {
                return(null, true);
            }

            var unicodeCMapName = $"{nonUnicodeCMap.Info.Registry}-{nonUnicodeCMap.Info.Ordering}-UCS2";

            return(CMapCache.Get(unicodeCMapName), true);
        }
Ejemplo n.º 7
0
        private ICidFont ParseDescendant(DictionaryToken dictionary, bool isLenientParsing)
        {
            var type = dictionary.GetNameOrDefault(NameToken.Type);
            if (type?.Equals(NameToken.Font) != true)
            {
                throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type}\'");
            }

            var result = cidFontFactory.Generate(dictionary, isLenientParsing);

            return result;
        }
Ejemplo n.º 8
0
        public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing)
        {
            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (!NameToken.Font.Equals(type))
            {
                throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type}\'");
            }

            var widths = ReadWidths(dictionary);
            var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);

            FontDescriptor descriptor = null;

            if (TryGetFontDescriptor(dictionary, out var descriptorDictionary))
            {
                descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing);
            }

            var fontProgram = ReadDescriptorFile(descriptor);

            var baseFont = dictionary.GetNameOrDefault(NameToken.BaseFont);

            var systemInfo = GetSystemInfo(dictionary);

            var subType = dictionary.GetNameOrDefault(NameToken.Subtype);

            if (NameToken.CidFontType0.Equals(subType))
            {
                //return new PDCIDFontType0(dictionary, parent);
            }

            if (NameToken.CidFontType2.Equals(subType))
            {
                return(new Type2CidFont(type, subType, baseFont, systemInfo, descriptor, fontProgram, verticalWritingMetrics, widths));
            }

            return(null);
        }
Ejemplo n.º 9
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'.");
            }

            var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);

            if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
            {
                rotation = new PageRotationDegrees(rotateToken.Int);
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            var stackDepth = 0;

            while (pageTreeMembers.ParentResources.Count > 0)
            {
                var resource = pageTreeMembers.ParentResources.Dequeue();

                resourceStore.LoadResourceDictionary(resource, isLenientParsing);
                stackDepth++;
            }

            if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
            {
                resourceStore.LoadResourceDictionary(resources, isLenientParsing);
                stackDepth++;
            }

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            PageContent content = default(PageContent);

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                for (var i = 0; i < array.Data.Count; i++)
                {
                    var item = array.Data[i];

                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider));

                    if (i < array.Data.Count - 1)
                    {
                        bytes.Add((byte)'\n');
                    }
                }

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider);

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
            }

            var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
                                new AnnotationProvider(pdfScanner, dictionary, isLenientParsing),
                                pdfScanner);

            for (var i = 0; i < stackDepth; i++)
            {
                resourceStore.UnloadResourceDictionary();
            }

            return(page);
        }
Ejemplo n.º 10
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page))
            {
                log?.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);

            if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
            {
                rotation = new PageRotationDegrees(rotateToken.Int);
            }

            var stackDepth = 0;

            while (pageTreeMembers.ParentResources.Count > 0)
            {
                var resource = pageTreeMembers.ParentResources.Dequeue();

                resourceStore.LoadResourceDictionary(resource);
                stackDepth++;
            }

            if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
            {
                resourceStore.LoadResourceDictionary(resources);
                stackDepth++;
            }

            // Apply rotation.
            if (rotation.SwapsAxis)
            {
                mediaBox = new MediaBox(new PdfRectangle(mediaBox.Bounds.Bottom,
                                                         mediaBox.Bounds.Left,
                                                         mediaBox.Bounds.Top,
                                                         mediaBox.Bounds.Right));
                cropBox = new CropBox(new PdfRectangle(cropBox.Bounds.Bottom,
                                                       cropBox.Bounds.Left,
                                                       cropBox.Bounds.Top,
                                                       cropBox.Bounds.Right));
            }

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            PageContent content;

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                content = new PageContent(EmptyArray <IGraphicsStateOperation> .Instance,
                                          EmptyArray <Letter> .Instance,
                                          EmptyArray <PdfPath> .Instance,
                                          EmptyArray <Union <XObjectContentRecord, InlineImage> > .Instance,
                                          EmptyArray <MarkedContentElement> .Instance,
                                          pdfScanner,
                                          filterProvider,
                                          resourceStore);
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                for (var i = 0; i < array.Data.Count; i++)
                {
                    var item = array.Data[i];

                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner));

                    if (i < array.Data.Count - 1)
                    {
                        bytes.Add((byte)'\n');
                    }
                }

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider, pdfScanner);

                content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox);
            }

            var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
                                new AnnotationProvider(pdfScanner, dictionary),
                                pdfScanner);

            for (var i = 0; i < stackDepth; i++)
            {
                resourceStore.UnloadResourceDictionary();
            }

            return(page);
        }
Ejemplo n.º 11
0
        public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetNameOrDefault(NameToken.Type);

            if (type != null && !type.Equals(NameToken.Page) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            LoadResources(dictionary, isLenientParsing);

            PageContent content = default(PageContent);

            if (!dictionary.TryGet(NameToken.Contents, out var contents))
            {
                // ignored for now, is it possible? check the spec...
            }
            else if (DirectObjectFinder.TryGet <ArrayToken>(contents, pdfScanner, out var array))
            {
                var bytes = new List <byte>();

                foreach (var item in array.Data)
                {
                    if (!(item is IndirectReferenceToken obj))
                    {
                        throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
                    }

                    var contentStream = DirectObjectFinder.Get <StreamToken>(obj, pdfScanner);

                    if (contentStream == null)
                    {
                        throw new InvalidOperationException($"Could not find the contents for object {obj}.");
                    }

                    bytes.AddRange(contentStream.Decode(filterProvider));
                }

                content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
            }
            else
            {
                var contentStream = DirectObjectFinder.Get <StreamToken>(contents, pdfScanner);

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var bytes = contentStream.Decode(filterProvider);

                content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
            }

            var page = new Page(number, mediaBox, cropBox, content);

            return(page);
        }