Example #1
0
        private (CosName key, CosBase value) ParseCosDictionaryNameValuePair(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            var key   = nameParser.Parse(reader);
            var value = ParseValue(reader, baseParser, pool);

            ReadHelper.SkipSpaces(reader);

            if ((char)reader.Peek() == 'd')
            {
                // if the next string is 'def' then we are parsing a cmap stream
                // and want to ignore it, otherwise throw an exception.
                var potentialDef = ReadHelper.ReadString(reader);
                if (!potentialDef.Equals("def"))
                {
                    reader.Unread(OtherEncodings.StringAsLatin1Bytes(potentialDef));
                }
                else
                {
                    ReadHelper.SkipSpaces(reader);
                }
            }

            if (value == null)
            {
                log?.Warn("Bad Dictionary Declaration " + ReadHelper.ReadString(reader));
                return(null, null);
            }

            // label this item as direct, to avoid signature problems.
            value.Direct = true;

            return(key, value);
        }
Example #2
0
        public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing, IPdfObjectParser parser)
        {
            PdfRawStream result;

            // read 'stream'; this was already tested in parseObjectsDynamically()
            ReadHelper.ReadExpectedString(reader, "stream");

            skipWhiteSpaces(reader);

            // This needs to be streamDictionary.getItem because when we are parsing, the underlying object might still be null.
            ICosNumber streamLength = GetLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE), isLenientParsing, parser);

            ValidateStreamLength(reader, isLenientParsing, streamLength);

            // get output stream to copy data to
            using (var stream = new MemoryStream())
                using (var writer = new BinaryWriter(stream))
                {
                    if (streamLength != null && validateStreamLength(reader, streamLength.AsLong(), reader.Length()))
                    {
                        ReadValidStream(reader, writer, streamLength);
                    }
                    else
                    {
                        ReadUntilEndStream(reader, writer);
                    }

                    result = new PdfRawStream(stream.ToArray(), streamDictionary);
                }

            String endStream = ReadHelper.ReadString(reader);

            if (endStream.Equals("endobj") && isLenientParsing)
            {
                log.Warn($"stream ends with \'endobj\' instead of \'endstream\' at offset {reader.GetPosition()}");

                // avoid follow-up warning about missing endobj
                reader.Rewind("endobj".Length);
            }
            else if (endStream.Length > 9 && isLenientParsing && endStream.Substring(0, 9).Equals("endstream"))
            {
                log.Warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + reader.GetPosition());
                // unread the "extra" bytes
                reader.Rewind(OtherEncodings.StringAsLatin1Bytes(endStream.Substring(9)).Length);
            }
            else if (!endStream.Equals("endstream"))
            {
                throw new InvalidOperationException("Error reading stream, expected='endstream' actual='"
                                                    + endStream + "' at offset " + reader.GetPosition());
            }

            return(result);
        }
Example #3
0
        public COSArray Parse(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool)
        {
            ReadHelper.ReadExpectedChar(reader, '[');
            var     po = new COSArray();
            CosBase pbo;

            ReadHelper.SkipSpaces(reader);
            int i;

            while (((i = reader.Peek()) > 0) && ((char)i != ']'))
            {
                pbo = baseParser.Parse(reader, pool);
                if (pbo is CosObject)
                {
                    // We have to check if the expected values are there or not PDFBOX-385
                    if (po.get(po.size() - 1) is CosInt)
                    {
                        var genNumber = (CosInt)po.remove(po.size() - 1);
                        if (po.get(po.size() - 1) is CosInt)
                        {
                            var          number = (CosInt)po.remove(po.size() - 1);
                            CosObjectKey key    = new CosObjectKey(number.AsLong(), genNumber.AsInt());
                            pbo = pool.Get(key);
                        }
                        else
                        {
                            // the object reference is somehow wrong
                            pbo = null;
                        }
                    }
                    else
                    {
                        pbo = null;
                    }
                }
                if (pbo != null)
                {
                    po.add(pbo);
                }
                else
                {
                    //it could be a bad object in the array which is just skipped
                    // LOG.warn("Corrupt object reference at offset " + seqSource.getPosition());

                    // This could also be an "endobj" or "endstream" which means we can assume that
                    // the array has ended.
                    string isThisTheEnd = ReadHelper.ReadString(reader);
                    reader.Unread(OtherEncodings.StringAsLatin1Bytes(isThisTheEnd));
                    if (string.Equals(isThisTheEnd, "endobj") || string.Equals(isThisTheEnd, "endstream"))
                    {
                        return(po);
                    }
                }

                ReadHelper.SkipSpaces(reader);
            }
            // read ']'
            reader.Read();
            ReadHelper.SkipSpaces(reader);
            return(po);
        }
Example #4
0
        public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool)
        {
            CosBase retval = null;

            ReadHelper.SkipSpaces(reader);
            int nextByte = reader.Peek();

            if (nextByte == -1)
            {
                return(null);
            }

            char c = (char)nextByte;

            switch (c)
            {
            case '<':
            {
                // pull off first left bracket
                int leftBracket = reader.Read();
                // check for second left bracket
                c = (char)reader.Peek();
                reader.Unread(leftBracket);
                if (c == '<')
                {
                    retval = dictionaryParser.Parse(reader, this, pool);
                    ReadHelper.SkipSpaces(reader);
                }
                else
                {
                    retval = stringParser.Parse(reader);
                }
                break;
            }

            case '[':
            {
                // array
                retval = arrayParser.Parse(reader, this, pool);
                break;
            }

            case '(':
                retval = stringParser.Parse(reader);
                break;

            case '/':
                // name
                retval = nameParser.Parse(reader);
                break;

            case 'n':
            {
                // null
                ReadHelper.ReadExpectedString(reader, "null");
                retval = CosNull.Null;
                break;
            }

            case 't':
            {
                string truestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(4));
                if (truestring.Equals("true"))
                {
                    retval = PdfBoolean.True;
                }
                else
                {
                    throw new IOException("expected true actual='" + truestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'f':
            {
                string falsestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(5));
                if (falsestring.Equals("false"))
                {
                    retval = PdfBoolean.False;
                }
                else
                {
                    throw new IOException("expected false actual='" + falsestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'R':
                reader.Read();
                retval = new CosObject(null);
                break;

            default:

                if (char.IsDigit(c) || c == '-' || c == '+' || c == '.')
                {
                    StringBuilder buf = new StringBuilder();
                    int           ic  = reader.Read();
                    c = (char)ic;
                    while (char.IsDigit(c) ||
                           c == '-' ||
                           c == '+' ||
                           c == '.' ||
                           c == 'E' ||
                           c == 'e')
                    {
                        buf.Append(c);
                        ic = reader.Read();
                        c  = (char)ic;
                    }
                    if (ic != -1)
                    {
                        reader.Unread(ic);
                    }
                    retval = CosNumberFactory.get(buf.ToString()) as CosBase;
                }
                else
                {
                    //This is not suppose to happen, but we will allow for it
                    //so we are more compatible with POS writers that don't
                    //follow the spec
                    string badstring = ReadHelper.ReadString(reader);
                    if (badstring == string.Empty)
                    {
                        int peek = reader.Peek();
                        // we can end up in an infinite loop otherwise
                        throw new IOException("Unknown dir object c='" + c +
                                              "' cInt=" + (int)c + " peek='" + (char)peek
                                              + "' peekInt=" + peek + " at offset " + reader.GetPosition());
                    }

                    // if it's an endstream/endobj, we want to put it back so the caller will see it
                    if (string.Equals("endobj", badstring) || string.Equals("endstream", badstring))
                    {
                        reader.Unread(OtherEncodings.StringAsLatin1Bytes(badstring));
                    }
                }
                break;
            }
            return(retval);
        }