Пример #1
0
        public PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            if (reader == null)
            {
                throw new ArgumentNullException(nameof(reader));
            }

            if (baseParser == null)
            {
                throw new ArgumentNullException(nameof(baseParser));
            }

            if (pool == null)
            {
                throw new ArgumentNullException(nameof(pool));
            }

            ReadHelper.ReadExpectedChar(reader, '<');
            ReadHelper.ReadExpectedChar(reader, '<');
            ReadHelper.SkipSpaces(reader);

            var dictionary = new PdfDictionary();

            var done = false;

            while (!done)
            {
                ReadHelper.SkipSpaces(reader);

                var c = (char)reader.Peek();

                switch (c)
                {
                case '>':
                    done = true;
                    break;

                case '/':
                    var nameValue = ParseCosDictionaryNameValuePair(reader, baseParser, pool);

                    if (nameValue.key != null && nameValue.value != null)
                    {
                        dictionary.Set(nameValue.key, nameValue.value);
                    }

                    break;

                default:
                    if (ReadUntilEnd(reader))
                    {
                        return(new PdfDictionary());
                    }
                    break;
                }
            }

            ReadHelper.ReadExpectedString(reader, ">>");

            return(dictionary);
        }
Пример #2
0
        private static StringBuilder ReadStringNumber(IRandomAccessRead reader)
        {
            int           lastByte = 0;
            StringBuilder buffer   = new StringBuilder();

            while ((lastByte = reader.Read()) != ' ' &&
                   lastByte != AsciiLineFeed &&
                   lastByte != AsciiCarriageReturn &&
                   lastByte != 60 &&  //see sourceforge bug 1714707
                   lastByte != '[' && // PDFBOX-1845
                   lastByte != '(' && // PDFBOX-2579
                   lastByte != 0 &&   //See sourceforge bug 853328
                   lastByte != -1)
            {
                buffer.Append((char)lastByte);
                if (buffer.Length > long.MaxValue.ToString("D").Length)
                {
                    throw new IOException("Number '" + buffer + "' is getting too long, stop reading at offset " + reader.GetPosition());
                }
            }
            if (lastByte != -1)
            {
                reader.Unread(lastByte);
            }

            return(buffer);
        }
Пример #3
0
        private (CosName key, CosBase value) ParseCosDictionaryNameValuePair(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            var key   = nameParser.Parse(reader);
            var value = ParseValue(reader, baseParser, pool);

            ReadHelper.SkipSpaces(reader);

            if ((char)reader.Peek() == 'd')
            {
                // if the next string is 'def' then we are parsing a cmap stream
                // and want to ignore it, otherwise throw an exception.
                var potentialDef = ReadHelper.ReadString(reader);
                if (!potentialDef.Equals("def"))
                {
                    reader.Unread(OtherEncodings.StringAsLatin1Bytes(potentialDef));
                }
                else
                {
                    ReadHelper.SkipSpaces(reader);
                }
            }

            if (value == null)
            {
                log?.Warn("Bad Dictionary Declaration " + ReadHelper.ReadString(reader));
                return(null, null);
            }

            // label this item as direct, to avoid signature problems.
            value.Direct = true;

            return(key, value);
        }
Пример #4
0
        private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            var numOffset = reader.GetPosition();
            var value     = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);

            // proceed if the given object is a number and the following is a number as well
            if (!(value is ICosNumber) || !ReadHelper.IsDigit(reader))
            {
                return(value);
            }
            // read the remaining information of the object number
            var genOffset        = reader.GetPosition();
            var generationNumber = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);
            ReadHelper.ReadExpectedChar(reader, 'R');
            if (!(value is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + numOffset);
            }
            if (!(generationNumber is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset);
            }

            var key = new CosObjectKey(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt());

            // dereference the object
            return(pool.Get(key));
        }
Пример #5
0
        public static void SkipSpaces(IRandomAccessRead reader)
        {
            const int commentCharacter = 37;
            int       c = reader.Read();

            while (IsWhitespace(c) || c == 37)
            {
                if (c == commentCharacter)
                {
                    // skip past the comment section
                    c = reader.Read();
                    while (!IsEndOfLine(c) && c != -1)
                    {
                        c = reader.Read();
                    }
                }
                else
                {
                    c = reader.Read();
                }
            }
            if (c != -1)
            {
                reader.Unread(c);
            }
        }
Пример #6
0
        private CosBase ReadNormalObjectStream(IRandomAccessRead reader, CosBase currentBase, long offset,
                                               bool isLenientParsing,
                                               out string endObjectKey)
        {
            if (currentBase is PdfDictionary dictionary)
            {
                PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing, this);

                currentBase = stream;
            }
            else
            {
                // this is not legal
                // the combination of a dict and the stream/endstream
                // forms a complete stream object
                throw new InvalidOperationException($"Stream not preceded by dictionary (offset: {offset}).");
            }

            ReadHelper.SkipSpaces(reader);
            endObjectKey = ReadHelper.ReadLine(reader);

            // we have case with a second 'endstream' before endobj
            if (!endObjectKey.StartsWith("endobj") && endObjectKey.StartsWith("endstream"))
            {
                endObjectKey = endObjectKey.Substring(9).Trim();
                if (endObjectKey.Length == 0)
                {
                    // no other characters in extra endstream line
                    // read next line
                    endObjectKey = ReadHelper.ReadLine(reader);
                }
            }

            return(currentBase);
        }
Пример #7
0
        public static string ReadLine(IRandomAccessRead reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException(nameof(reader));
            }

            if (reader.IsEof())
            {
                throw new InvalidOperationException("Error: End-of-File, expected line");
            }

            var buffer = new StringBuilder(11);

            int c;

            while ((c = reader.Read()) != -1)
            {
                // CR and LF are valid EOLs
                if (IsEndOfLine(c))
                {
                    break;
                }

                buffer.Append((char)c);
            }

            // CR+LF is also a valid EOL
            if (IsCarriageReturn(c) && IsLineFeed(reader.Peek()))
            {
                reader.Read();
            }

            return(buffer.ToString());
        }
Пример #8
0
        private bool checkObjectKeys(IRandomAccessRead source, CosObjectKey objectKey, long offset)
        {
            // there can't be any object at the very beginning of a pdf
            if (offset < MINIMUM_SEARCH_OFFSET)
            {
                return(false);
            }
            long   objectNr     = objectKey.Number;
            long   objectGen    = objectKey.Generation;
            long   originOffset = source.GetPosition();
            string objectString = ObjectHelper.createObjectString(objectNr, objectGen);

            try
            {
                source.Seek(offset);
                if (ReadHelper.IsString(source, OtherEncodings.StringAsLatin1Bytes(objectString)))
                {
                    // everything is ok, return origin object key
                    source.Seek(originOffset);
                    return(true);
                }
            }
            catch (InvalidOperationException exception)
            {
                // Swallow the exception, obviously there isn't any valid object number
            }
            finally
            {
                source.Seek(originOffset);
            }
            // no valid object number found
            return(false);
        }
Пример #9
0
 private Dictionary <CosObjectKey, long> getBFCosObjectOffsets(IRandomAccessRead reader)
 {
     if (bfSearchCOSObjectKeyOffsets == null)
     {
         bfSearchForObjects(reader);
     }
     return(bfSearchCOSObjectKeyOffsets);
 }
Пример #10
0
        public CosBase Parse(IRandomAccessRead reader, long objectNumber, int objectGeneration,
                             CosObjectPool pool, CrossReferenceTable crossReferenceTable,
                             BruteForceSearcher bruteForceSearcher,
                             bool isLenient,
                             bool requireExistingObject)
        {
            if (pool == null)
            {
                throw new ArgumentNullException(nameof(pool));
            }

            var key = new CosObjectKey(objectNumber, objectGeneration);

            var pdfObject = pool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            if (crossReferenceTable == null)
            {
                throw new ArgumentNullException(nameof(crossReferenceTable));
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenient && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                return(CosNull.Null);
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, pool, isLenient));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, objectNumber, pool, crossReferenceTable, bruteForceSearcher, isLenient));
        }
Пример #11
0
        public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            var firstCharacter = GetFirstCharacter(dictionary);

            var lastCharacter = GetLastCharacter(dictionary);

            var widths = GetWidths(dictionary);

            var descriptor = GetFontDescriptor(dictionary, reader, isLenientParsing);

            var font = ParseTrueTypeFont(descriptor, reader, isLenientParsing);

            var name = GetName(dictionary, descriptor);

            CMap toUnicodeCMap = null;

            if (dictionary.TryGetItemOfType(CosName.TO_UNICODE, out CosObject toUnicodeObj))
            {
                var toUnicode = pdfObjectParser.Parse(toUnicodeObj.ToIndirectReference(), reader, isLenientParsing) as PdfRawStream;

                var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);

                if (decodedUnicodeCMap != null)
                {
                    toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
                }
            }

            Encoding encoding = null;

            if (dictionary.TryGetValue(CosName.ENCODING, out var encodingBase))
            {
                // Symbolic fonts default to standard encoding.
                if (descriptor.Flags.HasFlag(FontFlags.Symbolic))
                {
                    encoding = StandardEncoding.Instance;
                }

                if (encodingBase is CosName encodingName)
                {
                    if (!Encoding.TryGetNamedEncoding(encodingName, out encoding))
                    {
                        // TODO: PDFBox would not throw here.
                        throw new InvalidFontFormatException($"Unrecognised encoding name: {encodingName}");
                    }
                }
                else if (encodingBase is CosDictionary encodingDictionary)
                {
                    throw new NotImplementedException("No support for reading encoding from dictionary yet.");
                }
                else
                {
                    throw new NotImplementedException("No support for reading encoding from font yet.");
                }
            }

            return(new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap, encoding));
        }
Пример #12
0
        public static void ReadExpectedChar(IRandomAccessRead reader, char ec)
        {
            char c = (char)reader.Read();

            if (c != ec)
            {
                throw new InvalidOperationException($"expected=\'{ec}\' actual=\'{c}\' at offset {reader.GetPosition()}");
            }
        }
Пример #13
0
 public ParsingArguments(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable, ParsingCachingProviders cachingProviders, IContainer container, bool isLenientParsing)
 {
     Reader = reader ?? throw new ArgumentNullException(nameof(reader));
     CrossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
     CachingProviders    = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
     Container           = container ?? throw new ArgumentNullException(nameof(container));
     IsLenientParsing    = isLenientParsing;
     Log = new NoOpLog();
 }
Пример #14
0
 public XrefOffsetValidator(ILog log, IRandomAccessRead source, CosDictionaryParser dictionaryParser,
                            CosBaseParser baseParser,
                            CosObjectPool pool)
 {
     this.log              = log;
     this.source           = source;
     this.dictionaryParser = dictionaryParser;
     this.baseParser       = baseParser;
     this.pool             = pool;
 }
Пример #15
0
        public static int ReadGenerationNumber(IRandomAccessRead reader)
        {
            int retval = ReadHelper.ReadInt(reader);

            if (retval < 0 || retval > GenerationNumberThreshold)
            {
                throw new FormatException("Generation Number '" + retval + "' has more than 5 digits");
            }
            return(retval);
        }
Пример #16
0
        public static long ReadObjectNumber(IRandomAccessRead reader)
        {
            long retval = ReadHelper.ReadLong(reader);

            if (retval < 0 || retval >= ObjectNumberThreshold)
            {
                throw new FormatException($"Object Number \'{retval}\' has more than 10 digits or is negative");
            }

            return(retval);
        }
Пример #17
0
        public CosBase Parse(IRandomAccessRead reader, CosObject obj, CosObjectPool pool,
                             CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenient, bool requireExistingObject)
        {
            if (obj == null)
            {
                throw new ArgumentNullException(nameof(obj));
            }

            return(Parse(reader, obj.GetObjectNumber(), obj.GetGenerationNumber(), pool,
                         crossReferenceTable, bruteForceSearcher, isLenient, requireExistingObject));
        }
Пример #18
0
        public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing, IPdfObjectParser parser)
        {
            PdfRawStream result;

            // read 'stream'; this was already tested in parseObjectsDynamically()
            ReadHelper.ReadExpectedString(reader, "stream");

            skipWhiteSpaces(reader);

            // This needs to be streamDictionary.getItem because when we are parsing, the underlying object might still be null.
            ICosNumber streamLength = GetLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE), isLenientParsing, parser);

            ValidateStreamLength(reader, isLenientParsing, streamLength);

            // get output stream to copy data to
            using (var stream = new MemoryStream())
                using (var writer = new BinaryWriter(stream))
                {
                    if (streamLength != null && validateStreamLength(reader, streamLength.AsLong(), reader.Length()))
                    {
                        ReadValidStream(reader, writer, streamLength);
                    }
                    else
                    {
                        ReadUntilEndStream(reader, writer);
                    }

                    result = new PdfRawStream(stream.ToArray(), streamDictionary);
                }

            String endStream = ReadHelper.ReadString(reader);

            if (endStream.Equals("endobj") && isLenientParsing)
            {
                log.Warn($"stream ends with \'endobj\' instead of \'endstream\' at offset {reader.GetPosition()}");

                // avoid follow-up warning about missing endobj
                reader.Rewind("endobj".Length);
            }
            else if (endStream.Length > 9 && isLenientParsing && endStream.Substring(0, 9).Equals("endstream"))
            {
                log.Warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + reader.GetPosition());
                // unread the "extra" bytes
                reader.Rewind(OtherEncodings.StringAsLatin1Bytes(endStream.Substring(9)).Length);
            }
            else if (!endStream.Equals("endstream"))
            {
                throw new InvalidOperationException("Error reading stream, expected='endstream' actual='"
                                                    + endStream + "' at offset " + reader.GetPosition());
            }

            return(result);
        }
Пример #19
0
        private ICidFont ParseDescendant(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            var type = dictionary.GetName(CosName.TYPE);

            if (!CosName.FONT.Equals(type))
            {
                throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type.Name}\'");
            }

            var result = cidFontFactory.Generate(dictionary, reader, isLenientParsing);

            return(result);
        }
Пример #20
0
        private ICosNumber GetLength(IRandomAccessRead source, CosBase lengthBaseObj, CosName streamType, bool isLenientParsing, IPdfObjectParser parser)
        {
            if (lengthBaseObj == null)
            {
                return(null);
            }

            // Length is given directly in the stream dictionary
            if (lengthBaseObj is ICosNumber number)
            {
                return(number);
            }

            // length in referenced object
            if (lengthBaseObj is CosObject lengthObj)
            {
                var currentObject = lengthObj.GetObject();

                if (currentObject == null)
                {
                    if (parser == null)
                    {
                        throw new InvalidOperationException("This method required access to the PDF object parser but it was not created yet. Figure out how to fix this.");
                    }

                    var currentOffset = source.GetPosition();

                    var obj = parser.Parse(lengthObj.ToIndirectReference(), source, isLenientParsing);

                    source.Seek(currentOffset);

                    if (obj is ICosNumber referenceNumber)
                    {
                        return(referenceNumber);
                    }

                    throw new InvalidOperationException("Length object content was not read.");
                }

                if (currentObject is ICosNumber objectNumber)
                {
                    return(objectNumber);
                }


                throw new InvalidOperationException("Wrong type of referenced length object " + lengthObj
                                                    + ": " + lengthObj.GetObject().GetType().Name);
            }

            throw new InvalidOperationException($"Wrong type of length object: {lengthBaseObj.GetType().Name}");
        }
Пример #21
0
        /// <summary>
        /// This will parse a PDF HEX string with fail fast semantic meaning that we stop if a not allowed character is found.
        /// This is necessary in order to detect malformed input and be able to skip to next object start.
        /// We assume starting '&lt;' was already read.
        /// </summary>
        private static CosString ParseHexString(IRandomAccessRead reader)
        {
            var sBuf = new StringBuilder();

            while (true)
            {
                int c = reader.Read();
                if (ReadHelper.IsHexDigit((char)c))
                {
                    sBuf.Append((char)c);
                }
                else if (c == '>')
                {
                    break;
                }
                else if (c < 0)
                {
                    throw new IOException("Missing closing bracket for hex string. Reached EOS.");
                }
                else if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\b' || c == '\f')
                {
                }
                else
                {
                    // if invalid chars was found: discard last
                    // hex character if it is not part of a pair
                    if (sBuf.Length % 2 != 0)
                    {
                        sBuf.Remove(sBuf.Length - 1, 1);
                    }

                    // read till the closing bracket was found
                    do
                    {
                        c = reader.Read();
                    }while (c != '>' && c >= 0);

                    // might have reached EOF while looking for the closing bracket
                    // this can happen for malformed PDFs only. Make sure that there is
                    // no endless loop.
                    if (c < 0)
                    {
                        throw new IOException("Missing closing bracket for hex string. Reached EOS.");
                    }

                    // exit loop
                    break;
                }
            }
            return(CosString.ParseHex(sBuf.ToString()));
        }
Пример #22
0
        private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
                                            CosObjectKey key,
                                            CosObjectPool pool,
                                            bool isLenientParsing)
        {
            reader.Seek(offset);

            var objectNumber     = ObjectHelper.ReadObjectNumber(reader);
            var objectGeneration = ObjectHelper.ReadGenerationNumber(reader);

            ReadHelper.ReadExpectedString(reader, "obj", true);

            if (objectNumber != key.Number || objectGeneration != key.Generation)
            {
                throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
            }

            ReadHelper.SkipSpaces(reader);

            var baseObject = baseParser.Parse(reader, pool);

            var endObjectKey = ReadHelper.ReadString(reader);

            var atStreamStart = string.Equals(endObjectKey, "stream");

            if (atStreamStart)
            {
                var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey);

                reader.Rewind(streamStartBytes.Length);

                baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey);
            }

            if (!string.Equals(endObjectKey, "endobj"))
            {
                var message =
                    $"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'";

                if (isLenientParsing)
                {
                    log.Warn(message);
                }
                else
                {
                    throw new InvalidOperationException(message);
                }
            }

            return(baseObject);
        }
Пример #23
0
        /**
         * Reads given pattern from {@link #seqSource}. Skipping whitespace at start and end if wanted.
         *
         * @param expectedstring pattern to be skipped
         * @param skipSpaces if set to true spaces before and after the string will be skipped
         * @throws IOException if pattern could not be read
         */
        public static void ReadExpectedString(IRandomAccessRead reader, string expectedstring, bool skipSpaces)
        {
            SkipSpaces(reader);

            foreach (var c in expectedstring)
            {
                if (reader.Read() != c)
                {
                    throw new IOException($"Expected string \'{expectedstring}\' but missed character \'{c}\' at offset {reader.GetPosition()}");
                }
            }

            SkipSpaces(reader);
        }
Пример #24
0
        public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false)
        {
            var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation);

            var pdfObject = objectPool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenientParsing && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                if (isLenientParsing)
                {
                    return(CosNull.Null);
                }

                throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table.");
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing));
        }
Пример #25
0
        public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            var baseFont = dictionary.GetName(CosName.BASE_FONT);

            var cMap = ReadEncoding(dictionary, out var isCMapPredefined);

            ICidFont cidFont;

            if (TryGetFirstDescendant(dictionary, out var descendantObject))
            {
                var parsed = DirectObjectFinder.Find <PdfDictionary>(descendantObject, pdfObjectParser, reader, isLenientParsing);

                if (parsed is PdfDictionary descendantFontDictionary)
                {
                    cidFont = ParseDescendant(descendantFontDictionary, reader, isLenientParsing);
                }
                else
                {
                    throw new InvalidFontFormatException("Expected to find a Descendant Font dictionary, instead it was: " + parsed);
                }
            }
            else
            {
                throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
            }

            var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);

            CMap toUnicodeCMap = null;

            if (dictionary.ContainsKey(CosName.TO_UNICODE))
            {
                var toUnicodeValue = dictionary[CosName.TO_UNICODE];

                var toUnicode = pdfObjectParser.Parse(((CosObject)toUnicodeValue).ToIndirectReference(), reader, isLenientParsing) as PdfRawStream;

                var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);

                if (decodedUnicodeCMap != null)
                {
                    toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
                }
            }

            var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap);

            return(font);
        }
Пример #26
0
        private void ValidateStreamLength(IRandomAccessRead reader, bool isLenientParsing, ICosNumber streamLength)
        {
            if (streamLength != null)
            {
                return;
            }

            if (isLenientParsing)
            {
                log.Warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " +
                         reader.GetPosition());
            }
            else
            {
                throw new InvalidOperationException("Missing length for stream.");
            }
        }
Пример #27
0
        public static bool IsString(IRandomAccessRead reader, IEnumerable <byte> str)
        {
            bool bytesMatching = true;
            long originOffset  = reader.GetPosition();

            foreach (var c in str)
            {
                if (reader.Read() != c)
                {
                    bytesMatching = false;
                    break;
                }
            }
            reader.Seek(originOffset);

            return(bytesMatching);
        }
Пример #28
0
        private FontDescriptor GetFontDescriptor(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
        {
            if (!dictionary.TryGetItemOfType(CosName.FONT_DESC, out CosObject obj))
            {
                throw new InvalidFontFormatException($"No font descriptor indirect reference found in the TrueType font: {dictionary}.");
            }

            var parsed = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, isLenientParsing);

            if (!(parsed is PdfDictionary descriptorDictionary))
            {
                throw new InvalidFontFormatException($"Expected a font descriptor dictionary but instead found {parsed}.");
            }

            var descriptor = fontDescriptorFactory.Generate(descriptorDictionary, isLenientParsing);

            return(descriptor);
        }
Пример #29
0
        private void ReadValidStream(IRandomAccessRead reader, BinaryWriter output, ICosNumber streamLengthObj)
        {
            long remainBytes = streamLengthObj.AsLong();

            while (remainBytes > 0)
            {
                int chunk     = (remainBytes > STREAMCOPYBUFLEN) ? STREAMCOPYBUFLEN : (int)remainBytes;
                int readBytes = reader.Read(streamCopyBuf, 0, chunk);
                if (readBytes <= 0)
                {
                    // shouldn't happen, the stream length has already been validated
                    throw new InvalidOperationException(
                              $"read error at offset {reader.GetPosition()}: expected {chunk} bytes, but read() returns {readBytes}");
                }
                output.Write(streamCopyBuf, 0, readBytes);
                remainBytes -= readBytes;
            }
        }
Пример #30
0
 internal PdfDocument(ILog log, IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
                      bool isLenientParsing,
                      ParsingCachingProviders cachingProviders,
                      IPageFactory pageFactory,
                      IPdfObjectParser pdfObjectParser,
                      Catalog catalog,
                      DocumentInformation information)
 {
     this.log                 = log;
     this.reader              = reader ?? throw new ArgumentNullException(nameof(reader));
     this.version             = version ?? throw new ArgumentNullException(nameof(version));
     this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
     this.isLenientParsing    = isLenientParsing;
     this.cachingProviders    = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
     Information              = information ?? throw new ArgumentNullException(nameof(information));
     Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
     Pages   = new Pages(log, Catalog, pdfObjectParser, pageFactory, reader, isLenientParsing);
 }