示例#1
0
        public PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            if (reader == null)
            {
                throw new ArgumentNullException(nameof(reader));
            }

            if (baseParser == null)
            {
                throw new ArgumentNullException(nameof(baseParser));
            }

            if (pool == null)
            {
                throw new ArgumentNullException(nameof(pool));
            }

            ReadHelper.ReadExpectedChar(reader, '<');
            ReadHelper.ReadExpectedChar(reader, '<');
            ReadHelper.SkipSpaces(reader);

            var dictionary = new PdfDictionary();

            var done = false;

            while (!done)
            {
                ReadHelper.SkipSpaces(reader);

                var c = (char)reader.Peek();

                switch (c)
                {
                case '>':
                    done = true;
                    break;

                case '/':
                    var nameValue = ParseCosDictionaryNameValuePair(reader, baseParser, pool);

                    if (nameValue.key != null && nameValue.value != null)
                    {
                        dictionary.Set(nameValue.key, nameValue.value);
                    }

                    break;

                default:
                    if (ReadUntilEnd(reader))
                    {
                        return(new PdfDictionary());
                    }
                    break;
                }
            }

            ReadHelper.ReadExpectedString(reader, ">>");

            return(dictionary);
        }
示例#2
0
        public CosBase Parse(IRandomAccessRead reader, long objectNumber, int objectGeneration,
                             CosObjectPool pool, CrossReferenceTable crossReferenceTable,
                             BruteForceSearcher bruteForceSearcher,
                             bool isLenient,
                             bool requireExistingObject)
        {
            if (pool == null)
            {
                throw new ArgumentNullException(nameof(pool));
            }

            var key = new CosObjectKey(objectNumber, objectGeneration);

            var pdfObject = pool.GetOrCreateDefault(key);

            if (pdfObject.GetObject() != null)
            {
                return(pdfObject.GetObject());
            }

            if (crossReferenceTable == null)
            {
                throw new ArgumentNullException(nameof(crossReferenceTable));
            }

            var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);

            if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
            {
                throw new InvalidOperationException("Object must be defined and not compressed: " + key);
            }

            if (isLenient && offsetOrStreamNumber == null)
            {
                var locations = bruteForceSearcher.GetObjectLocations();

                offsetOrStreamNumber = TryGet(key, locations);

                if (offsetOrStreamNumber != null)
                {
                    crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
                }
            }

            if (offsetOrStreamNumber == null)
            {
                return(CosNull.Null);
            }

            var isCompressedStreamObject = offsetOrStreamNumber <= 0;

            if (!isCompressedStreamObject)
            {
                return(ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, pool, isLenient));
            }

            return(ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, objectNumber, pool, crossReferenceTable, bruteForceSearcher, isLenient));
        }
示例#3
0
        public IReadOnlyList <CosObject> Parse(PdfRawStream stream, CosObjectPool pool)
        {
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }

            //need to first parse the header.
            var numberOfObjects = stream.Dictionary.GetIntOrDefault(CosName.N);
            var objectNumbers   = new List <long>(numberOfObjects);

            var streamObjects = new List <CosObject>(numberOfObjects);

            var bytes = stream.Decode(filterProvider);

            var reader = new RandomAccessBuffer(bytes);

            for (int i = 0; i < numberOfObjects; i++)
            {
                long objectNumber = ObjectHelper.ReadObjectNumber(reader);
                // skip offset
                ReadHelper.ReadLong(reader);
                objectNumbers.Add(objectNumber);
            }

            CosObject obj;
            CosBase   cosObject;
            int       objectCounter = 0;

            while ((cosObject = baseParser.Parse(reader, pool)) != null)
            {
                obj = new CosObject(cosObject);
                obj.SetGenerationNumber(0);

                if (objectCounter >= objectNumbers.Count)
                {
                    log.Error("/ObjStm (object stream) has more objects than /N " + numberOfObjects);
                    break;
                }

                obj.SetObjectNumber(objectNumbers[objectCounter]);
                streamObjects.Add(obj);

                // According to the spec objects within an object stream shall not be enclosed
                // by obj/endobj tags, but there are some pdfs in the wild using those tags
                // skip endobject marker if present
                if (!reader.IsEof() && reader.Peek() == 'e')
                {
                    ReadHelper.ReadLine(reader);
                }

                objectCounter++;
            }

            return(streamObjects);
        }
示例#4
0
 public XrefOffsetValidator(ILog log, IRandomAccessRead source, CosDictionaryParser dictionaryParser,
                            CosBaseParser baseParser,
                            CosObjectPool pool)
 {
     this.log              = log;
     this.source           = source;
     this.dictionaryParser = dictionaryParser;
     this.baseParser       = baseParser;
     this.pool             = pool;
 }
示例#5
0
        public CosBase Parse(IRandomAccessRead reader, CosObject obj, CosObjectPool pool,
                             CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenient, bool requireExistingObject)
        {
            if (obj == null)
            {
                throw new ArgumentNullException(nameof(obj));
            }

            return(Parse(reader, obj.GetObjectNumber(), obj.GetGenerationNumber(), pool,
                         crossReferenceTable, bruteForceSearcher, isLenient, requireExistingObject));
        }
示例#6
0
 public PdfObjectParser(ILog log, CosBaseParser baseParser, CosStreamParser streamParser, CrossReferenceTable crossReferenceTable,
                        BruteForceSearcher bruteForceSearcher,
                        CosObjectPool objectPool,
                        ObjectStreamParser objectStreamParser)
 {
     this.log                 = log ?? new NoOpLog();
     this.baseParser          = baseParser ?? throw new ArgumentNullException(nameof(baseParser));
     this.streamParser        = streamParser ?? throw new ArgumentNullException(nameof(streamParser));
     this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
     this.bruteForceSearcher  = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
     this.objectPool          = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
     this.objectStreamParser  = objectStreamParser ?? throw new ArgumentNullException(nameof(objectStreamParser));
 }
示例#7
0
        private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
                                            CosObjectKey key,
                                            CosObjectPool pool,
                                            bool isLenientParsing)
        {
            reader.Seek(offset);

            var objectNumber     = ObjectHelper.ReadObjectNumber(reader);
            var objectGeneration = ObjectHelper.ReadGenerationNumber(reader);

            ReadHelper.ReadExpectedString(reader, "obj", true);

            if (objectNumber != key.Number || objectGeneration != key.Generation)
            {
                throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
            }

            ReadHelper.SkipSpaces(reader);

            var baseObject = baseParser.Parse(reader, pool);

            var endObjectKey = ReadHelper.ReadString(reader);

            var atStreamStart = string.Equals(endObjectKey, "stream");

            if (atStreamStart)
            {
                var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey);

                reader.Rewind(streamStartBytes.Length);

                baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey);
            }

            if (!string.Equals(endObjectKey, "endobj"))
            {
                var message =
                    $"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'";

                if (isLenientParsing)
                {
                    log.Warn(message);
                }
                else
                {
                    throw new InvalidOperationException(message);
                }
            }

            return(baseObject);
        }
示例#8
0
 public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, IResourceStore resourceContainer)
 {
     ObjectPool         = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
     BruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
     ResourceContainer  = resourceContainer ?? throw new ArgumentNullException(nameof(resourceContainer));
 }
示例#9
0
        private CosBase ParseCompressedStreamObject(IRandomAccessRead reader, long streamObjectNumber, long requestedNumber, CosObjectPool objectPool, CrossReferenceTable crossReferenceTable, BruteForceSearcher bruteForceSearcher, bool isLenientParsing)
        {
            var baseStream = Parse(reader, streamObjectNumber, 0, objectPool, crossReferenceTable, bruteForceSearcher,
                                   isLenientParsing, true);

            if (!(baseStream is PdfRawStream stream))
            {
                log.Warn($"Could not find a stream for the object number, defaults to returning CosNull: {streamObjectNumber}");

                return(CosNull.Null);
            }

            var objects = objectStreamParser.Parse(stream, objectPool);

            // register all objects which are referenced to be contained in object stream
            foreach (var next in objects)
            {
                var streamKey = new CosObjectKey(next);
                var offset    = TryGet(streamKey, crossReferenceTable.ObjectOffsets);

                if (offset != null && offset == -streamObjectNumber)
                {
                    var streamObject = objectPool.Get(streamKey);
                    streamObject.SetObject(next.GetObject());
                }
            }

            var matchingStreamObject = objects.FirstOrDefault(x => x.GetObjectNumber() == requestedNumber);

            if (matchingStreamObject != null)
            {
                return(matchingStreamObject);
            }

            log.Error($"Could not find the object {requestedNumber} in the stream for object {streamObjectNumber}. Returning CosNull.");

            return(CosNull.Null);
        }
示例#10
0
 public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool)
 {
     return(CosNull.Null);
 }
示例#11
0
        private static CosBase ParseTrailer(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable,
                                            DynamicParser dynamicParser, BruteForceSearcher bruteForceSearcher, CosObjectPool pool, bool isLenientParsing)
        {
            foreach (var value in crossReferenceTable.Dictionary.Values)
            {
                if (value is CosObject temporaryObject)
                {
                    // Loads these objects into the object pool for access later.
                    dynamicParser.Parse(reader, temporaryObject, pool, crossReferenceTable, bruteForceSearcher,
                                        isLenientParsing, false);
                }
            }

            CosObject root = (CosObject)crossReferenceTable.Dictionary.GetItemOrDefault(CosName.ROOT);

            if (root == null)
            {
                throw new InvalidOperationException("Missing root object specification in trailer.");
            }

            var rootObject = dynamicParser.Parse(reader, root, pool, crossReferenceTable, bruteForceSearcher,
                                                 isLenientParsing, false);

            return(rootObject);
        }
示例#12
0
        private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            var numOffset = reader.GetPosition();
            var value     = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);

            // proceed if the given object is a number and the following is a number as well
            if (!(value is ICosNumber) || !ReadHelper.IsDigit(reader))
            {
                return(value);
            }
            // read the remaining information of the object number
            var genOffset        = reader.GetPosition();
            var generationNumber = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);
            ReadHelper.ReadExpectedChar(reader, 'R');
            if (!(value is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + numOffset);
            }
            if (!(generationNumber is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset);
            }

            var key = new CosObjectKey(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt());

            // dereference the object
            return(pool.Get(key));
        }
示例#13
0
        private (CosName key, CosBase value) ParseCosDictionaryNameValuePair(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            var key   = nameParser.Parse(reader);
            var value = ParseValue(reader, baseParser, pool);

            ReadHelper.SkipSpaces(reader);

            if ((char)reader.Peek() == 'd')
            {
                // if the next string is 'def' then we are parsing a cmap stream
                // and want to ignore it, otherwise throw an exception.
                var potentialDef = ReadHelper.ReadString(reader);
                if (!potentialDef.Equals("def"))
                {
                    reader.Unread(OtherEncodings.StringAsLatin1Bytes(potentialDef));
                }
                else
                {
                    ReadHelper.SkipSpaces(reader);
                }
            }

            if (value == null)
            {
                log?.Warn("Bad Dictionary Declaration " + ReadHelper.ReadString(reader));
                return(null, null);
            }

            // label this item as direct, to avoid signature problems.
            value.Direct = true;

            return(key, value);
        }
示例#14
0
        public COSArray Parse(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool)
        {
            ReadHelper.ReadExpectedChar(reader, '[');
            var     po = new COSArray();
            CosBase pbo;

            ReadHelper.SkipSpaces(reader);
            int i;

            while (((i = reader.Peek()) > 0) && ((char)i != ']'))
            {
                pbo = baseParser.Parse(reader, pool);
                if (pbo is CosObject)
                {
                    // We have to check if the expected values are there or not PDFBOX-385
                    if (po.get(po.size() - 1) is CosInt)
                    {
                        var genNumber = (CosInt)po.remove(po.size() - 1);
                        if (po.get(po.size() - 1) is CosInt)
                        {
                            var          number = (CosInt)po.remove(po.size() - 1);
                            CosObjectKey key    = new CosObjectKey(number.AsLong(), genNumber.AsInt());
                            pbo = pool.Get(key);
                        }
                        else
                        {
                            // the object reference is somehow wrong
                            pbo = null;
                        }
                    }
                    else
                    {
                        pbo = null;
                    }
                }
                if (pbo != null)
                {
                    po.add(pbo);
                }
                else
                {
                    //it could be a bad object in the array which is just skipped
                    // LOG.warn("Corrupt object reference at offset " + seqSource.getPosition());

                    // This could also be an "endobj" or "endstream" which means we can assume that
                    // the array has ended.
                    string isThisTheEnd = ReadHelper.ReadString(reader);
                    reader.Unread(OtherEncodings.StringAsLatin1Bytes(isThisTheEnd));
                    if (string.Equals(isThisTheEnd, "endobj") || string.Equals(isThisTheEnd, "endstream"))
                    {
                        return(po);
                    }
                }

                ReadHelper.SkipSpaces(reader);
            }
            // read ']'
            reader.Read();
            ReadHelper.SkipSpaces(reader);
            return(po);
        }
示例#15
0
 public PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
 {
     return(new PdfDictionary());
 }
            private bool TryParseTrailer(IRandomAccessRead source, bool isLenientParsing, CosObjectPool pool, out PdfDictionary trailer)
            {
                trailer = null;
                // parse the last trailer.
                var trailerOffset = source.GetPosition();

                // PDFBOX-1739 skip extra xref entries in RegisSTAR documents
                if (isLenientParsing)
                {
                    int nextCharacter = source.Peek();
                    while (nextCharacter != 't' && ReadHelper.IsDigit(nextCharacter))
                    {
                        if (source.GetPosition() == trailerOffset)
                        {
                            // warn only the first time
                            //LOG.warn("Expected trailer object at position " + trailerOffset
                            //        + ", keep trying");
                        }
                        ReadHelper.ReadLine(source);
                        nextCharacter = source.Peek();
                    }
                }
                if (source.Peek() != 't')
                {
                    return(false);
                }
                //read "trailer"
                long   currentOffset = source.GetPosition();
                string nextLine      = ReadHelper.ReadLine(source);

                if (!nextLine.Trim().Equals("trailer"))
                {
                    // in some cases the EOL is missing and the trailer immediately
                    // continues with "<<" or with a blank character
                    // even if this does not comply with PDF reference we want to support as many PDFs as possible
                    // Acrobat reader can also deal with this.
                    if (nextLine.StartsWith("trailer"))
                    {
                        // we can't just unread a portion of the read data as we don't know if the EOL consist of 1 or 2 bytes
                        int len = "trailer".Length;
                        // jump back right after "trailer"
                        source.Seek(currentOffset + len);
                    }
                    else
                    {
                        return(false);
                    }
                }

                // in some cases the EOL is missing and the trailer continues with " <<"
                // even if this does not comply with PDF reference we want to support as many PDFs as possible
                // Acrobat reader can also deal with this.
                ReadHelper.SkipSpaces(source);

                PdfDictionary parsedTrailer = dictionaryParser.Parse(source, baseParser, pool);

                trailer = parsedTrailer;

                ReadHelper.SkipSpaces(source);
                return(true);
            }
            public bool TryParse(IRandomAccessRead source, long offset, bool isLenientParsing, CosObjectPool pool, out CrossReferenceTablePartBuilder builder)
            {
                builder = null;

                var tableStartOffset = source.GetPosition();

                if (source.Peek() != 'x')
                {
                    return(false);
                }

                var xref = ReadHelper.ReadString(source);

                if (!xref.Trim().Equals("xref"))
                {
                    return(false);
                }

                // check for trailer after xref
                var str = ReadHelper.ReadString(source);

                byte[] b = OtherEncodings.StringAsLatin1Bytes(str);

                source.Rewind(b.Length);

                if (str.StartsWith("trailer"))
                {
                    log.Warn("skipping empty xref table");
                    return(false);
                }

                builder = new CrossReferenceTablePartBuilder
                {
                    Offset   = offset,
                    XRefType = CrossReferenceType.Table
                };

                // Tables can have multiple sections. Each starts with a starting object id and a count.
                while (true)
                {
                    if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition))
                    {
                        log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}");

                        if (isLenientParsing)
                        {
                            break;
                        }

                        return(false);
                    }

                    var currentObjectId = subsectionDefinition.FirstNumber;

                    ReadHelper.SkipSpaces(source);
                    for (var i = 0; i < subsectionDefinition.Count; i++)
                    {
                        if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek()))
                        {
                            break;
                        }

                        if (source.Peek() == 't')
                        {
                            break;
                        }

                        //Ignore table contents
                        var currentLine = ReadHelper.ReadLine(source);
                        var splitString = currentLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                        if (splitString.Length < 3)
                        {
                            log.Warn("invalid xref line: " + currentLine);
                            break;
                        }

                        // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
                        if (splitString[splitString.Length - 1].Equals(InUseEntry))
                        {
                            try
                            {
                                var objectOffset = long.Parse(splitString[0]);

                                if (objectOffset >= tableStartOffset && objectOffset <= source.GetPosition())
                                {
                                    // PDFBOX-3923: offset points inside this table - that can't be good
                                    throw new InvalidOperationException(
                                              $"Object offset {objectOffset} is within its own cross-reference table for object {currentObjectId}");
                                }

                                var generation = int.Parse(splitString[1]);
                                builder.Add(currentObjectId, generation, objectOffset);
                            }
                            catch (FormatException e)
                            {
                                throw new InvalidOperationException("Bad", e);
                            }
                        }
                        else if (!splitString[2].Equals(FreeEntry))
                        {
                            throw new InvalidOperationException(
                                      $"Corrupt cross-reference table entry for object {currentObjectId}. The indicator was not 'n' or 'f' but {splitString[2]}.");
                        }

                        currentObjectId++;

                        ReadHelper.SkipSpaces(source);
                    }

                    ReadHelper.SkipSpaces(source);
                    if (!ReadHelper.IsDigit(source))
                    {
                        break;
                    }
                }

                if (!TryParseTrailer(source, isLenientParsing, pool, out var trailer))
                {
                    throw new InvalidOperationException($"Something went wrong trying to read the XREF table at {offset}.");
                }

                builder.Dictionary = trailer;
                builder.Previous   = trailer.GetLongOrDefault(CosName.PREV);

                return(true);
            }
示例#18
0
        private bool CheckXRefStreamOffset(IRandomAccessRead source, long startXRefOffset, bool isLenient, CosObjectPool pool)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenient || startXRefOffset == 0)
            {
                return(true);
            }
            // seek to offset-1
            source.Seek(startXRefOffset - 1);
            int nextValue = source.Read();

            // the first character has to be a whitespace, and then a digit
            if (ReadHelper.IsWhitespace(nextValue))
            {
                ReadHelper.SkipSpaces(source);
                if (ReadHelper.IsDigit(source))
                {
                    try
                    {
                        // it's a XRef stream
                        ObjectHelper.ReadObjectNumber(source);
                        ObjectHelper.ReadGenerationNumber(source);

                        ReadHelper.ReadExpectedString(source, "obj", true);

                        // check the dictionary to avoid false positives
                        PdfDictionary dict = dictionaryParser.Parse(source, baseParser, pool);
                        source.Seek(startXRefOffset);

                        if (dict.IsType(CosName.XREF))
                        {
                            return(true);
                        }
                    }
                    catch (Exception ex)
                    {
                        log.Error("Couldn't read the xref stream object.", ex);
                        // there wasn't an object of a xref stream
                        source.Seek(startXRefOffset);
                    }
                }
            }
            return(false);
        }
示例#19
0
        private static PdfDocument OpenDocument(IRandomAccessRead reader, IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing)
        {
            var log = container.Get <ILog>();

            var version = container.Get <FileHeaderParser>().Parse(scanner, isLenientParsing);

            var crossReferenceOffset = container.Get <FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);

            var pool = new CosObjectPool();

            // TODO: make this use the scanner.
            var validator = new CrossReferenceOffsetValidator(new XrefOffsetValidator(log, reader, container.Get <CosDictionaryParser>(),
                                                                                      container.Get <CosBaseParser>(), pool));

            crossReferenceOffset = validator.Validate(crossReferenceOffset, isLenientParsing);

            var crossReferenceTable = container.Get <CrossReferenceParser>()
                                      .Parse(reader, isLenientParsing, crossReferenceOffset, pool);

            container.Get <CrossReferenceParser>().ParseNew(crossReferenceOffset, scanner, isLenientParsing);

            var filterProvider     = container.Get <IFilterProvider>();
            var bruteForceSearcher = new BruteForceSearcher(reader);
            var pdfObjectParser    = new PdfObjectParser(container.Get <ILog>(), container.Get <CosBaseParser>(),
                                                         container.Get <CosStreamParser>(), crossReferenceTable, bruteForceSearcher, pool, container.Get <ObjectStreamParser>());

            var trueTypeFontParser    = new TrueTypeFontParser();
            var fontDescriptorFactory = new FontDescriptorFactory();

            var cidFontFactory = new CidFontFactory(fontDescriptorFactory, trueTypeFontParser, pdfObjectParser, filterProvider);

            var cMapCache = new CMapCache(new CMapParser());

            var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
                                                                        cMapCache,
                                                                        filterProvider,
                                                                        pdfObjectParser),
                                              new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser));

            var dynamicParser     = container.Get <DynamicParser>();
            var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory);

            var pageFactory        = new PageFactory(resourceContainer, pdfObjectParser, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
            var informationFactory = new DocumentInformationFactory();
            var catalogFactory     = new CatalogFactory(pdfObjectParser);

            var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
                                    isLenientParsing);

            if (!(root is PdfDictionary rootDictionary))
            {
                throw new InvalidOperationException("Expected root dictionary, but got this: " + root);
            }

            // in some pdfs the type value "Catalog" is missing in the root object
            if (isLenientParsing && !rootDictionary.ContainsKey(CosName.TYPE))
            {
                rootDictionary.Set(CosName.TYPE, CosName.CATALOG);
            }

            var information = informationFactory.Create(pdfObjectParser, crossReferenceTable.Dictionary, reader, isLenientParsing);

            var catalog = catalogFactory.Create(rootDictionary, reader, isLenientParsing);

            var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);

            return(new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, catalog, information));
        }
示例#20
0
        public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool)
        {
            CosBase retval = null;

            ReadHelper.SkipSpaces(reader);
            int nextByte = reader.Peek();

            if (nextByte == -1)
            {
                return(null);
            }

            char c = (char)nextByte;

            switch (c)
            {
            case '<':
            {
                // pull off first left bracket
                int leftBracket = reader.Read();
                // check for second left bracket
                c = (char)reader.Peek();
                reader.Unread(leftBracket);
                if (c == '<')
                {
                    retval = dictionaryParser.Parse(reader, this, pool);
                    ReadHelper.SkipSpaces(reader);
                }
                else
                {
                    retval = stringParser.Parse(reader);
                }
                break;
            }

            case '[':
            {
                // array
                retval = arrayParser.Parse(reader, this, pool);
                break;
            }

            case '(':
                retval = stringParser.Parse(reader);
                break;

            case '/':
                // name
                retval = nameParser.Parse(reader);
                break;

            case 'n':
            {
                // null
                ReadHelper.ReadExpectedString(reader, "null");
                retval = CosNull.Null;
                break;
            }

            case 't':
            {
                string truestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(4));
                if (truestring.Equals("true"))
                {
                    retval = PdfBoolean.True;
                }
                else
                {
                    throw new IOException("expected true actual='" + truestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'f':
            {
                string falsestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(5));
                if (falsestring.Equals("false"))
                {
                    retval = PdfBoolean.False;
                }
                else
                {
                    throw new IOException("expected false actual='" + falsestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'R':
                reader.Read();
                retval = new CosObject(null);
                break;

            default:

                if (char.IsDigit(c) || c == '-' || c == '+' || c == '.')
                {
                    StringBuilder buf = new StringBuilder();
                    int           ic  = reader.Read();
                    c = (char)ic;
                    while (char.IsDigit(c) ||
                           c == '-' ||
                           c == '+' ||
                           c == '.' ||
                           c == 'E' ||
                           c == 'e')
                    {
                        buf.Append(c);
                        ic = reader.Read();
                        c  = (char)ic;
                    }
                    if (ic != -1)
                    {
                        reader.Unread(ic);
                    }
                    retval = CosNumberFactory.get(buf.ToString()) as CosBase;
                }
                else
                {
                    //This is not suppose to happen, but we will allow for it
                    //so we are more compatible with POS writers that don't
                    //follow the spec
                    string badstring = ReadHelper.ReadString(reader);
                    if (badstring == string.Empty)
                    {
                        int peek = reader.Peek();
                        // we can end up in an infinite loop otherwise
                        throw new IOException("Unknown dir object c='" + c +
                                              "' cInt=" + (int)c + " peek='" + (char)peek
                                              + "' peekInt=" + peek + " at offset " + reader.GetPosition());
                    }

                    // if it's an endstream/endobj, we want to put it back so the caller will see it
                    if (string.Equals("endobj", badstring) || string.Equals("endstream", badstring))
                    {
                        reader.Unread(OtherEncodings.StringAsLatin1Bytes(badstring));
                    }
                }
                break;
            }
            return(retval);
        }