Ejemplo n.º 1
0
        public IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            var type = dictionary.GetName(CosName.TYPE);

            if (!type.Equals(CosName.FONT))
            {
                var message = "The font dictionary did not have type 'Font'. " + dictionary;

                if (isLenientParsing)
                {
                    log?.Error(message);
                }
                else
                {
                    throw new InvalidFontFormatException(message);
                }
            }

            var subtype = dictionary.GetName(CosName.SUBTYPE);

            if (handlers.TryGetValue(subtype, out var handler))
            {
                return(handler.Generate(dictionary, reader, isLenientParsing));
            }

            throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue.");
        }
Ejemplo n.º 2
0
        private static CMap GetUcs2CMap(PdfDictionary dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont)
        {
            if (!isCMapPredefined)
            {
                return(null);
            }

            /*
             * If the font is a composite font that uses one of the predefined CMaps except Identity–H and Identity–V or whose descendant
             * CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or Adobe-Korea1 character collection use a UCS2 CMap.
             */

            var encodingName = dictionary.GetName(CosName.ENCODING);

            if (encodingName == null)
            {
                return(null);
            }

            var isPredefinedIdentityMap = encodingName.Equals(CosName.IDENTITY_H) || encodingName.Equals(CosName.IDENTITY_V);

            if (isPredefinedIdentityMap && !usesDescendantAdobeFont)
            {
                return(null);
            }

            throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue.");
        }
Ejemplo n.º 3
0
        public ICidFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            var type = dictionary.GetName(CosName.TYPE);

            if (!CosName.FONT.Equals(type))
            {
                throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type.Name}\'");
            }

            var widths = ReadWidths(dictionary);
            var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);

            FontDescriptor descriptor = null;

            if (TryGetFontDescriptor(dictionary, reader, out var descriptorDictionary))
            {
                descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing);
            }

            var fontProgram = ReadDescriptorFile(descriptor, reader, isLenientParsing);

            var baseFont = dictionary.GetName(CosName.BASE_FONT);

            var systemInfo = GetSystemInfo(dictionary);

            var subType = dictionary.GetName(CosName.SUBTYPE);

            if (CosName.CID_FONT_TYPE0.Equals(subType))
            {
                //return new PDCIDFontType0(dictionary, parent);
            }

            if (CosName.CID_FONT_TYPE2.Equals(subType))
            {
                return(new Type2CidFont(type, subType, baseFont, systemInfo, descriptor, fontProgram, verticalWritingMetrics, widths));
            }

            return(null);
        }
Ejemplo n.º 4
0
        private ICidFont ParseDescendant(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            var type = dictionary.GetName(CosName.TYPE);

            if (!CosName.FONT.Equals(type))
            {
                throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type.Name}\'");
            }

            var result = cidFontFactory.Generate(dictionary, reader, isLenientParsing);

            return(result);
        }
Ejemplo n.º 5
0
        public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
                           bool isLenientParsing)
        {
            if (dictionary == null)
            {
                throw new ArgumentNullException(nameof(dictionary));
            }

            var type = dictionary.GetName(CosName.TYPE);

            if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing)
            {
                throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
            }

            MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
            CropBox  cropBox  = GetCropBox(dictionary, pageTreeMembers, mediaBox);

            UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);

            LoadResources(dictionary, reader, isLenientParsing);

            PageContent content = default(PageContent);

            var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;

            if (contentObject != null)
            {
                var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as PdfRawStream;

                if (contentStream == null)
                {
                    throw new InvalidOperationException("Failed to parse the content for the page: " + number);
                }

                var contents = contentStream.Decode(filterProvider);

                var txt = OtherEncodings.BytesAsLatin1String(contents);

                var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));

                var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);

                content = context.Process(operations);
            }

            var page = new Page(number, mediaBox, cropBox, content);

            return(page);
        }
Ejemplo n.º 6
0
        public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            var baseFont = dictionary.GetName(CosName.BASE_FONT);

            var cMap = ReadEncoding(dictionary, out var isCMapPredefined);

            ICidFont cidFont;

            if (TryGetFirstDescendant(dictionary, out var descendantObject))
            {
                var parsed = DirectObjectFinder.Find <PdfDictionary>(descendantObject, pdfObjectParser, reader, isLenientParsing);

                if (parsed is PdfDictionary descendantFontDictionary)
                {
                    cidFont = ParseDescendant(descendantFontDictionary, reader, isLenientParsing);
                }
                else
                {
                    throw new InvalidFontFormatException("Expected to find a Descendant Font dictionary, instead it was: " + parsed);
                }
            }
            else
            {
                throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
            }

            var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);

            CMap toUnicodeCMap = null;

            if (dictionary.ContainsKey(CosName.TO_UNICODE))
            {
                var toUnicodeValue = dictionary[CosName.TO_UNICODE];

                var toUnicode = pdfObjectParser.Parse(((CosObject)toUnicodeValue).ToIndirectReference(), reader, isLenientParsing) as PdfRawStream;

                var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);

                if (decodedUnicodeCMap != null)
                {
                    toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
                }
            }

            var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap);

            return(font);
        }
Ejemplo n.º 7
0
        public bool FindPage(PdfDictionary currentPageDictionary, int soughtPageNumber, List <int> pageNumbersObserved)
        {
            var type = currentPageDictionary.GetName(CosName.TYPE);

            if (type.Equals(CosName.PAGE))
            {
                var pageNumber = GetNextPageNumber(pageNumbersObserved);

                bool found = pageNumber == soughtPageNumber;

                locatedPages[pageNumber] = currentPageDictionary;
                pageNumbersObserved.Add(pageNumber);

                return(found);
            }

            if (!type.Equals(CosName.PAGES))
            {
                log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);

                return(false);
            }

            var kids = currentPageDictionary.GetDictionaryObject(CosName.KIDS) as COSArray;

            pageFactory.LoadResources(currentPageDictionary, reader, isLenientParsing);

            bool childFound = false;

            foreach (var kid in kids.OfType <CosObject>())
            {
                // todo: exit early
                var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;

                var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);

                if (thisPageMatches)
                {
                    childFound = true;
                    break;
                }
            }

            return(childFound);
        }
Ejemplo n.º 8
0
        public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing, IPdfObjectParser parser)
        {
            PdfRawStream result;

            // read 'stream'; this was already tested in parseObjectsDynamically()
            ReadHelper.ReadExpectedString(reader, "stream");

            skipWhiteSpaces(reader);

            // This needs to be streamDictionary.getItem because when we are parsing, the underlying object might still be null.
            ICosNumber streamLength = GetLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE), isLenientParsing, parser);

            ValidateStreamLength(reader, isLenientParsing, streamLength);

            // get output stream to copy data to
            using (var stream = new MemoryStream())
                using (var writer = new BinaryWriter(stream))
                {
                    if (streamLength != null && validateStreamLength(reader, streamLength.AsLong(), reader.Length()))
                    {
                        ReadValidStream(reader, writer, streamLength);
                    }
                    else
                    {
                        ReadUntilEndStream(reader, writer);
                    }

                    result = new PdfRawStream(stream.ToArray(), streamDictionary);
                }

            String endStream = ReadHelper.ReadString(reader);

            if (endStream.Equals("endobj") && isLenientParsing)
            {
                log.Warn($"stream ends with \'endobj\' instead of \'endstream\' at offset {reader.GetPosition()}");

                // avoid follow-up warning about missing endobj
                reader.Rewind("endobj".Length);
            }
            else if (endStream.Length > 9 && isLenientParsing && endStream.Substring(0, 9).Equals("endstream"))
            {
                log.Warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + reader.GetPosition());
                // unread the "extra" bytes
                reader.Rewind(OtherEncodings.StringAsLatin1Bytes(endStream.Substring(9)).Length);
            }
            else if (!endStream.Equals("endstream"))
            {
                throw new InvalidOperationException("Error reading stream, expected='endstream' actual='"
                                                    + endStream + "' at offset " + reader.GetPosition());
            }

            return(result);
        }