Beispiel #1
0
        public static void SkipSpaces(IRandomAccessRead reader)
        {
            const int commentCharacter = 37;
            int       c = reader.Read();

            while (IsWhitespace(c) || c == 37)
            {
                if (c == commentCharacter)
                {
                    // skip past the comment section
                    c = reader.Read();
                    while (!IsEndOfLine(c) && c != -1)
                    {
                        c = reader.Read();
                    }
                }
                else
                {
                    c = reader.Read();
                }
            }
            if (c != -1)
            {
                reader.Unread(c);
            }
        }
Beispiel #2
0
        protected void skipWhiteSpaces(IRandomAccessRead reader)
        {
            //PDF Ref 3.2.7 A stream must be followed by either
            //a CRLF or LF but nothing else.

            int whitespace = reader.Read();

            //see brother_scan_cover.pdf, it adds whitespaces
            //after the stream but before the start of the
            //data, so just read those first
            while (whitespace == ' ')
            {
                whitespace = reader.Read();
            }

            if (whitespace == ReadHelper.AsciiCarriageReturn)
            {
                whitespace = reader.Read();
                if (whitespace != ReadHelper.AsciiLineFeed)
                {
                    reader.Unread(whitespace);
                    //The spec says this is invalid but it happens in the real
                    //world so we must support it.
                }
            }
            else if (whitespace != ReadHelper.AsciiLineFeed)
            {
                //we are in an error.
                //but again we will do a lenient parsing and just assume that everything
                //is fine
                reader.Unread(whitespace);
            }
        }
Beispiel #3
0
        public static string ReadLine(IRandomAccessRead reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException(nameof(reader));
            }

            if (reader.IsEof())
            {
                throw new InvalidOperationException("Error: End-of-File, expected line");
            }

            var buffer = new StringBuilder(11);

            int c;

            while ((c = reader.Read()) != -1)
            {
                // CR and LF are valid EOLs
                if (IsEndOfLine(c))
                {
                    break;
                }

                buffer.Append((char)c);
            }

            // CR+LF is also a valid EOL
            if (IsCarriageReturn(c) && IsLineFeed(reader.Peek()))
            {
                reader.Read();
            }

            return(buffer.ToString());
        }
Beispiel #4
0
 public int Read()
 {
     if (Throw)
     {
         throw new InvalidOperationException();
     }
     return(reader.Read());
 }
Beispiel #5
0
        /// <summary>
        /// This will parse a PDF HEX string with fail fast semantic meaning that we stop if a not allowed character is found.
        /// This is necessary in order to detect malformed input and be able to skip to next object start.
        /// We assume starting '&lt;' was already read.
        /// </summary>
        private static CosString ParseHexString(IRandomAccessRead reader)
        {
            var sBuf = new StringBuilder();

            while (true)
            {
                int c = reader.Read();
                if (ReadHelper.IsHexDigit((char)c))
                {
                    sBuf.Append((char)c);
                }
                else if (c == '>')
                {
                    break;
                }
                else if (c < 0)
                {
                    throw new IOException("Missing closing bracket for hex string. Reached EOS.");
                }
                else if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\b' || c == '\f')
                {
                }
                else
                {
                    // if invalid chars was found: discard last
                    // hex character if it is not part of a pair
                    if (sBuf.Length % 2 != 0)
                    {
                        sBuf.Remove(sBuf.Length - 1, 1);
                    }

                    // read till the closing bracket was found
                    do
                    {
                        c = reader.Read();
                    }while (c != '>' && c >= 0);

                    // might have reached EOF while looking for the closing bracket
                    // this can happen for malformed PDFs only. Make sure that there is
                    // no endless loop.
                    if (c < 0)
                    {
                        throw new IOException("Missing closing bracket for hex string. Reached EOS.");
                    }

                    // exit loop
                    break;
                }
            }
            return(CosString.ParseHex(sBuf.ToString()));
        }
Beispiel #6
0
        private static StringBuilder ReadStringNumber(IRandomAccessRead reader)
        {
            int           lastByte = 0;
            StringBuilder buffer   = new StringBuilder();

            while ((lastByte = reader.Read()) != ' ' &&
                   lastByte != AsciiLineFeed &&
                   lastByte != AsciiCarriageReturn &&
                   lastByte != 60 &&  //see sourceforge bug 1714707
                   lastByte != '[' && // PDFBOX-1845
                   lastByte != '(' && // PDFBOX-2579
                   lastByte != 0 &&   //See sourceforge bug 853328
                   lastByte != -1)
            {
                buffer.Append((char)lastByte);
                if (buffer.Length > long.MaxValue.ToString("D").Length)
                {
                    throw new IOException("Number '" + buffer + "' is getting too long, stop reading at offset " + reader.GetPosition());
                }
            }
            if (lastByte != -1)
            {
                reader.Unread(lastByte);
            }

            return(buffer);
        }
Beispiel #7
0
        private static int CheckForEndOfString(IRandomAccessRead reader, int bracesParameter)
        {
            int braces = bracesParameter;

            byte[] nextThreeBytes = new byte[3];
            int    amountRead     = reader.Read(nextThreeBytes);

            // Check the next 3 bytes if available
            // The following cases are valid indicators for the end of the string
            // 1. Next line contains another COSObject: CR + LF + '/'
            // 2. CosDictionary ends in the next line: CR + LF + '>'
            // 3. Next line contains another COSObject: CR + '/'
            // 4. CosDictionary ends in the next line: CR + '>'
            if (amountRead == 3 && nextThreeBytes[0] == ReadHelper.AsciiCarriageReturn)
            {
                if (nextThreeBytes[1] == ReadHelper.AsciiLineFeed && nextThreeBytes[2] == '/' || nextThreeBytes[2] == '>' ||
                    nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
                {
                    braces = 0;
                }
            }
            if (amountRead > 0)
            {
                reader.Unread(nextThreeBytes, 0, amountRead);
            }
            return(braces);
        }
Beispiel #8
0
        public static void ReadExpectedChar(IRandomAccessRead reader, char ec)
        {
            char c = (char)reader.Read();

            if (c != ec)
            {
                throw new InvalidOperationException($"expected=\'{ec}\' actual=\'{c}\' at offset {reader.GetPosition()}");
            }
        }
Beispiel #9
0
        public static string ReadString(IRandomAccessRead reader)
        {
            SkipSpaces(reader);
            StringBuilder buffer = new StringBuilder();
            int           c      = reader.Read();

            while (!IsEndOfName((char)c) && c != -1)
            {
                buffer.Append((char)c);
                c = reader.Read();
            }
            if (c != -1)
            {
                reader.Unread(c);
            }

            return(buffer.ToString());
        }
Beispiel #10
0
        /**
         * Reads given pattern from {@link #seqSource}. Skipping whitespace at start and end if wanted.
         *
         * @param expectedstring pattern to be skipped
         * @param skipSpaces if set to true spaces before and after the string will be skipped
         * @throws IOException if pattern could not be read
         */
        public static void ReadExpectedString(IRandomAccessRead reader, string expectedstring, bool skipSpaces)
        {
            SkipSpaces(reader);

            foreach (var c in expectedstring)
            {
                if (reader.Read() != c)
                {
                    throw new IOException($"Expected string \'{expectedstring}\' but missed character \'{c}\' at offset {reader.GetPosition()}");
                }
            }

            SkipSpaces(reader);
        }
Beispiel #11
0
        public static bool IsString(IRandomAccessRead reader, IEnumerable <byte> str)
        {
            bool bytesMatching = true;
            long originOffset  = reader.GetPosition();

            foreach (var c in str)
            {
                if (reader.Read() != c)
                {
                    bytesMatching = false;
                    break;
                }
            }
            reader.Seek(originOffset);

            return(bytesMatching);
        }
Beispiel #12
0
        private void ReadValidStream(IRandomAccessRead reader, BinaryWriter output, ICosNumber streamLengthObj)
        {
            long remainBytes = streamLengthObj.AsLong();

            while (remainBytes > 0)
            {
                int chunk     = (remainBytes > STREAMCOPYBUFLEN) ? STREAMCOPYBUFLEN : (int)remainBytes;
                int readBytes = reader.Read(streamCopyBuf, 0, chunk);
                if (readBytes <= 0)
                {
                    // shouldn't happen, the stream length has already been validated
                    throw new InvalidOperationException(
                              $"read error at offset {reader.GetPosition()}: expected {chunk} bytes, but read() returns {readBytes}");
                }
                output.Write(streamCopyBuf, 0, readBytes);
                remainBytes -= readBytes;
            }
        }
Beispiel #13
0
        private bool CheckXRefStreamOffset(IRandomAccessRead source, long startXRefOffset, bool isLenient, CosObjectPool pool)
        {
            // repair mode isn't available in non-lenient mode
            if (!isLenient || startXRefOffset == 0)
            {
                return(true);
            }
            // seek to offset-1
            source.Seek(startXRefOffset - 1);
            int nextValue = source.Read();

            // the first character has to be a whitespace, and then a digit
            if (ReadHelper.IsWhitespace(nextValue))
            {
                ReadHelper.SkipSpaces(source);
                if (ReadHelper.IsDigit(source))
                {
                    try
                    {
                        // it's a XRef stream
                        ObjectHelper.ReadObjectNumber(source);
                        ObjectHelper.ReadGenerationNumber(source);

                        ReadHelper.ReadExpectedString(source, "obj", true);

                        // check the dictionary to avoid false positives
                        PdfDictionary dict = dictionaryParser.Parse(source, baseParser, pool);
                        source.Seek(startXRefOffset);

                        if (dict.IsType(CosName.XREF))
                        {
                            return(true);
                        }
                    }
                    catch (Exception ex)
                    {
                        log.Error("Couldn't read the xref stream object.", ex);
                        // there wasn't an object of a xref stream
                        source.Seek(startXRefOffset);
                    }
                }
            }
            return(false);
        }
Beispiel #14
0
 private void bfSearchForLastEOFMarker(IRandomAccessRead source)
 {
     if (lastEOFMarker == null)
     {
         long originOffset = source.GetPosition();
         source.Seek(MINIMUM_SEARCH_OFFSET);
         while (!source.IsEof())
         {
             // search for EOF marker
             if (ReadHelper.IsString(source, "%%EOF"))
             {
                 long tempMarker = source.GetPosition();
                 source.Seek(tempMarker + 5);
                 try
                 {
                     // check if the following data is some valid pdf content
                     // which most likely indicates that the pdf is linearized,
                     // updated or just cut off somewhere in the middle
                     ReadHelper.SkipSpaces(source);
                     ObjectHelper.ReadObjectNumber(source);
                     ObjectHelper.ReadGenerationNumber(source);
                 }
                 catch (InvalidOperationException exception)
                 {
                     // save the EOF marker as the following data is most likely some garbage
                     lastEOFMarker = tempMarker;
                 }
             }
             source.Read();
         }
         source.Seek(originOffset);
         // no EOF marker found
         if (lastEOFMarker == null)
         {
             lastEOFMarker = long.MaxValue;
         }
     }
 }
Beispiel #15
0
 public int read()
 {
     return(reader.Read());
 }
Beispiel #16
0
        public IReadOnlyDictionary <CosObjectKey, long> GetObjectLocations()
        {
            if (objectLocations != null)
            {
                return(objectLocations);
            }

            var lastEndOfFile = GetLastEndOfFileMarker();

            var results = new Dictionary <CosObjectKey, long>();

            var originPosition = reader.GetPosition();

            long currentOffset    = MinimumSearchOffset;
            long lastObjectId     = long.MinValue;
            int  lastGenerationId = int.MinValue;
            long lastObjOffset    = long.MinValue;

            byte[] objString    = OtherEncodings.StringAsLatin1Bytes(" obj");
            byte[] endobjString = OtherEncodings.StringAsLatin1Bytes("endobj");

            bool endobjFound = false;

            do
            {
                reader.Seek(currentOffset);
                if (ReadHelper.IsString(reader, objString))
                {
                    long tempOffset = currentOffset - 1;
                    reader.Seek(tempOffset);
                    int generationId = reader.Peek();

                    // is the next char a digit?
                    if (ReadHelper.IsDigit(generationId))
                    {
                        generationId -= 48;
                        tempOffset--;
                        reader.Seek(tempOffset);
                        if (ReadHelper.IsSpace(reader))
                        {
                            while (tempOffset > MinimumSearchOffset && ReadHelper.IsSpace(reader))
                            {
                                reader.Seek(--tempOffset);
                            }

                            bool objectIdFound = false;
                            while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(reader))
                            {
                                reader.Seek(--tempOffset);
                                objectIdFound = true;
                            }

                            if (objectIdFound)
                            {
                                reader.Read();
                                long objectId = ObjectHelper.ReadObjectNumber(reader);
                                if (lastObjOffset > 0)
                                {
                                    // add the former object ID only if there was a subsequent object ID
                                    results[new CosObjectKey(lastObjectId, lastGenerationId)] = lastObjOffset;
                                }
                                lastObjectId     = objectId;
                                lastGenerationId = generationId;
                                lastObjOffset    = tempOffset + 1;
                                currentOffset   += objString.Length - 1;
                                endobjFound      = false;
                            }
                        }
                    }
                }
                else if (ReadHelper.IsString(reader, "endobj"))
                {
                    endobjFound    = true;
                    currentOffset += endobjString.Length - 1;
                }
                currentOffset++;
            } while (currentOffset < lastEndOfFile && !reader.IsEof());
            if ((lastEndOfFile < long.MaxValue || endobjFound) && lastObjOffset > 0)
            {
                // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
                // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
                results[new CosObjectKey(lastObjectId, lastGenerationId)] = lastObjOffset;
            }

            // reestablish origin position
            reader.Seek(originPosition);

            objectLocations = results;

            return(objectLocations);
        }
Beispiel #17
0
        private void ReadUntilEndStream(IRandomAccessRead source, BinaryWriter output)
        {
            int bufSize;
            int charMatchCount = 0;

            byte[] keyw = ENDSTREAM;

            // last character position of shortest keyword ('endobj')
            int quickTestOffset = 5;

            // read next chunk into buffer; already matched chars are added to beginning of buffer
            while ((bufSize = source.Read(strmBuf, charMatchCount, STRMBUFLEN - charMatchCount)) > 0)
            {
                bufSize += charMatchCount;

                int bIdx = charMatchCount;
                int quickTestIdx;

                // iterate over buffer, trying to find keyword match
                for (int maxQuicktestIdx = bufSize - quickTestOffset; bIdx < bufSize; bIdx++)
                {
                    // reduce compare operations by first test last character we would have to
                    // match if current one matches; if it is not a character from keywords
                    // we can move behind the test character; this shortcut is inspired by the
                    // Boyer-Moore string search algorithm and can reduce parsing time by approx. 20%
                    quickTestIdx = bIdx + quickTestOffset;
                    if (charMatchCount == 0 && quickTestIdx < maxQuicktestIdx)
                    {
                        byte ch = strmBuf[quickTestIdx];
                        if ((ch > 't') || (ch < 'a'))
                        {
                            // last character we would have to match if current character would match
                            // is not a character from keywords -> jump behind and start over
                            bIdx = quickTestIdx;
                            continue;
                        }
                    }

                    // could be negative - but we only compare to ASCII
                    byte ch1 = strmBuf[bIdx];

                    if (ch1 == keyw[charMatchCount])
                    {
                        if (++charMatchCount == keyw.Length)
                        {
                            // match found
                            bIdx++;
                            break;
                        }
                    }
                    else
                    {
                        if ((charMatchCount == 3) && (ch1 == ENDOBJ[charMatchCount]))
                        {
                            // maybe ENDSTREAM is missing but we could have ENDOBJ
                            keyw = ENDOBJ;
                            charMatchCount++;
                        }
                        else
                        {
                            // no match; incrementing match start by 1 would be dumb since we already know
                            // matched chars depending on current char read we may already have beginning
                            // of a new match: 'e': first char matched; 'n': if we are at match position
                            // idx 7 we already read 'e' thus 2 chars matched for each other char we have
                            // to start matching first keyword char beginning with next read position
                            charMatchCount = (ch1 == 'e') ? 1 : ((ch1 == 'n') && (charMatchCount == 7)) ? 2 : 0;
                            // search again for 'endstream'
                            keyw = ENDSTREAM;
                        }
                    }
                }

                int contentBytes = Math.Max(0, bIdx - charMatchCount);

                // write buffer content until first matched char to output stream
                if (contentBytes > 0)
                {
                    output.Write(strmBuf, 0, contentBytes);
                }
                if (charMatchCount == keyw.Length)
                {
                    // keyword matched; unread matched keyword (endstream/endobj) and following buffered content
                    source.Rewind(bufSize - contentBytes);
                    break;
                }

                // copy matched chars at start of buffer
                Array.Copy(keyw, 0, strmBuf, 0, charMatchCount);
            }
            // this writes a lonely CR or drops trailing CR LF and LF
            // output.flush();
        }
Beispiel #18
0
        private void bfSearchForObjects(IRandomAccessRead source)
        {
            bfSearchForLastEOFMarker(source);
            bfSearchCOSObjectKeyOffsets = new Dictionary <CosObjectKey, long>();
            long originOffset  = source.GetPosition();
            long currentOffset = MINIMUM_SEARCH_OFFSET;
            long lastObjectId  = long.MinValue;
            int  lastGenID     = int.MinValue;
            long lastObjOffset = long.MinValue;

            char[] objString    = " obj".ToCharArray();
            char[] endobjString = "endobj".ToCharArray();
            bool   endobjFound  = false;

            do
            {
                source.Seek(currentOffset);
                if (ReadHelper.IsString(source, "obj"))
                {
                    long tempOffset = currentOffset - 1;
                    source.Seek(tempOffset);
                    int genID = source.Peek();
                    // is the next char a digit?
                    if (ReadHelper.IsDigit(genID))
                    {
                        genID -= 48;
                        tempOffset--;
                        source.Seek(tempOffset);
                        if (ReadHelper.IsSpace(source))
                        {
                            while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsSpace(source))
                            {
                                source.Seek(--tempOffset);
                            }
                            bool objectIDFound = false;
                            while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsDigit(source))
                            {
                                source.Seek(--tempOffset);
                                objectIDFound = true;
                            }
                            if (objectIDFound)
                            {
                                source.Read();
                                long objectId = ObjectHelper.ReadObjectNumber(source);
                                if (lastObjOffset > 0)
                                {
                                    // add the former object ID only if there was a subsequent object ID
                                    bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset;
                                }
                                lastObjectId   = objectId;
                                lastGenID      = genID;
                                lastObjOffset  = tempOffset + 1;
                                currentOffset += objString.Length - 1;
                                endobjFound    = false;
                            }
                        }
                    }
                }
                else if (ReadHelper.IsString(source, "endobj"))
                {
                    endobjFound    = true;
                    currentOffset += endobjString.Length - 1;
                }
                currentOffset++;
            } while (currentOffset < lastEOFMarker && !source.IsEof());
            if ((lastEOFMarker < long.MaxValue || endobjFound) && lastObjOffset > 0)
            {
                // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
                // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
                bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset;
            }
            // reestablish origin position

            source.Seek(originOffset);
        }
Beispiel #19
0
        public COSArray Parse(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool)
        {
            ReadHelper.ReadExpectedChar(reader, '[');
            var     po = new COSArray();
            CosBase pbo;

            ReadHelper.SkipSpaces(reader);
            int i;

            while (((i = reader.Peek()) > 0) && ((char)i != ']'))
            {
                pbo = baseParser.Parse(reader, pool);
                if (pbo is CosObject)
                {
                    // We have to check if the expected values are there or not PDFBOX-385
                    if (po.get(po.size() - 1) is CosInt)
                    {
                        var genNumber = (CosInt)po.remove(po.size() - 1);
                        if (po.get(po.size() - 1) is CosInt)
                        {
                            var          number = (CosInt)po.remove(po.size() - 1);
                            CosObjectKey key    = new CosObjectKey(number.AsLong(), genNumber.AsInt());
                            pbo = pool.Get(key);
                        }
                        else
                        {
                            // the object reference is somehow wrong
                            pbo = null;
                        }
                    }
                    else
                    {
                        pbo = null;
                    }
                }
                if (pbo != null)
                {
                    po.add(pbo);
                }
                else
                {
                    //it could be a bad object in the array which is just skipped
                    // LOG.warn("Corrupt object reference at offset " + seqSource.getPosition());

                    // This could also be an "endobj" or "endstream" which means we can assume that
                    // the array has ended.
                    string isThisTheEnd = ReadHelper.ReadString(reader);
                    reader.Unread(OtherEncodings.StringAsLatin1Bytes(isThisTheEnd));
                    if (string.Equals(isThisTheEnd, "endobj") || string.Equals(isThisTheEnd, "endstream"))
                    {
                        return(po);
                    }
                }

                ReadHelper.SkipSpaces(reader);
            }
            // read ']'
            reader.Read();
            ReadHelper.SkipSpaces(reader);
            return(po);
        }
Beispiel #20
0
        private static bool ReadUntilEnd(IRandomAccessRead reader)
        {
            var c = reader.Read();

            while (c != -1 && c != '/' && c != '>')
            {
                // in addition to stopping when we find / or >, we also want
                // to stop when we find endstream or endobj.
                if (c == E)
                {
                    c = reader.Read();
                    if (c == N)
                    {
                        c = reader.Read();
                        if (c == D)
                        {
                            c = reader.Read();
                            var isStream = c == S && reader.Read() == T && reader.Read() == R &&
                                           reader.Read() == E && reader.Read() == A && reader.Read() == M;
                            var isObj = !isStream && c == O && reader.Read() == B && reader.Read() == J;
                            if (isStream || isObj)
                            {
                                // we're done reading this object!
                                return(true);
                            }
                        }
                    }
                }
                c = reader.Read();
            }
            if (c == -1)
            {
                return(true);
            }
            reader.Unread(c);
            return(false);
        }
Beispiel #21
0
        public CosString Parse(IRandomAccessRead seqSource)
        {
            char nextChar = (char)seqSource.Read();

            if (nextChar == '<')
            {
                return(ParseHexString(seqSource));
            }

            if (nextChar != '(')
            {
                throw new IOException("parseCOSstring string should start with '(' or '<' and not '" +
                                      nextChar + "' " + seqSource);
            }

            using (var memoryStream = new MemoryStream())
                using (var writer = new StreamWriter(memoryStream))
                {
                    // This is the number of braces read
                    int braces = 1;
                    int c      = seqSource.Read();
                    while (braces > 0 && c != -1)
                    {
                        char ch    = (char)c;
                        int  nextc = -2; // not yet read

                        if (ch == ')')
                        {
                            braces--;
                            braces = CheckForEndOfString(seqSource, braces);
                            if (braces != 0)
                            {
                                writer.Write(ch);
                            }
                        }
                        else if (ch == '(')
                        {
                            braces++;
                            writer.Write(ch);
                        }
                        else if (ch == '\\')
                        {
                            //patched by ram
                            char next = (char)seqSource.Read();
                            switch (next)
                            {
                            case 'n':
                                writer.Write('\n');
                                break;

                            case 'r':
                                writer.Write('\r');
                                break;

                            case 't':
                                writer.Write('\t');
                                break;

                            case 'b':
                                writer.Write('\b');
                                break;

                            case 'f':
                                writer.Write('\f');
                                break;

                            case ')':
                                // PDFBox 276 /Title (c:\)
                                braces = CheckForEndOfString(seqSource, braces);
                                if (braces != 0)
                                {
                                    writer.Write(next);
                                }
                                else
                                {
                                    writer.Write('\\');
                                }
                                break;

                            case '(':
                            case '\\':
                                writer.Write(next);
                                break;

                            case '0':
                            case '1':
                            case '2':
                            case '3':
                            case '4':
                            case '5':
                            case '6':
                            case '7':
                            {
                                var octal = new StringBuilder();
                                octal.Append(next);
                                c = seqSource.Read();
                                char digit = (char)c;
                                if (digit >= '0' && digit <= '7')
                                {
                                    octal.Append(digit);
                                    c     = seqSource.Read();
                                    digit = (char)c;
                                    if (digit >= '0' && digit <= '7')
                                    {
                                        octal.Append(digit);
                                    }
                                    else
                                    {
                                        nextc = c;
                                    }
                                }
                                else
                                {
                                    nextc = c;
                                }

                                int character;
                                try
                                {
                                    character = Convert.ToInt32(octal.ToString(), 8);
                                }
                                catch (FormatException e)
                                {
                                    throw new IOException("Error: Expected octal character, actual='" + octal + "'", e);
                                }

                                writer.Write(character);
                                break;
                            }

                            default:
                                if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed)
                                {
                                    // this is a break in the line so ignore it and the newline and continue
                                    c = seqSource.Read();
                                    while (ReadHelper.IsEndOfLine(c) && c != -1)
                                    {
                                        c = seqSource.Read();
                                    }

                                    nextc = c;

                                    break;
                                }
                                // dropping the backslash
                                // see 7.3.4.2 Literal strings for further information
                                writer.Write(next);
                                break;
                            }
                        }
                        else
                        {
                            writer.Write(ch);
                        }
                        if (nextc != -2)
                        {
                            c = nextc;
                        }
                        else
                        {
                            c = seqSource.Read();
                        }
                    }
                    if (c != -1)
                    {
                        seqSource.Unread(c);
                    }
                    writer.Flush();
                    return(new CosString(memoryStream.ToArray()));
                }
        }
Beispiel #22
0
 private void BfSearchForXRefStreams()
 {
     if (bfSearchXRefStreamsOffsets == null)
     {
         // a pdf may contain more than one /XRef entry
         bfSearchXRefStreamsOffsets = new List <long>();
         long originOffset = source.GetPosition();
         source.Seek(MinimumSearchOffset);
         // search for XRef streams
         var objString = " obj";
         while (!source.IsEof())
         {
             if (ReadHelper.IsString(source, "xref"))
             {
                 // search backwards for the beginning of the stream
                 long newOffset  = -1;
                 long xrefOffset = source.GetPosition();
                 bool objFound   = false;
                 for (int i = 1; i < 40 && !objFound; i++)
                 {
                     long currentOffset = xrefOffset - (i * 10);
                     if (currentOffset > 0)
                     {
                         source.Seek(currentOffset);
                         for (int j = 0; j < 10; j++)
                         {
                             if (ReadHelper.IsString(source, objString))
                             {
                                 long tempOffset = currentOffset - 1;
                                 source.Seek(tempOffset);
                                 int genId = source.Peek();
                                 // is the next char a digit?
                                 if (ReadHelper.IsDigit(genId))
                                 {
                                     tempOffset--;
                                     source.Seek(tempOffset);
                                     if (ReadHelper.IsSpace(source))
                                     {
                                         int length = 0;
                                         source.Seek(--tempOffset);
                                         while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(source))
                                         {
                                             source.Seek(--tempOffset);
                                             length++;
                                         }
                                         if (length > 0)
                                         {
                                             source.Read();
                                             newOffset = source.GetPosition();
                                         }
                                     }
                                 }
                                 objFound = true;
                                 break;
                             }
                             else
                             {
                                 currentOffset++;
                                 source.Read();
                             }
                         }
                     }
                 }
                 if (newOffset > -1)
                 {
                     bfSearchXRefStreamsOffsets.Add(newOffset);
                 }
                 source.Seek(xrefOffset + 5);
             }
             source.Read();
         }
         source.Seek(originOffset);
     }
 }
Beispiel #23
0
        public CosName Parse([NotNull] IRandomAccessRead reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException(nameof(reader));
            }

            ReadHelper.ReadExpectedChar(reader, '/');

            using (var memoryStream = new MemoryStream())
                using (var writer = new BinaryWriter(memoryStream))
                {
                    int c = reader.Read();
                    while (c != -1)
                    {
                        byte ch = (byte)c;
                        if (ch == '#')
                        {
                            int ch1 = reader.Read();
                            int ch2 = reader.Read();
                            // Prior to PDF v1.2, the # was not a special character.  Also,
                            // it has been observed that various PDF tools do not follow the
                            // spec with respect to the # escape, even though they report
                            // PDF versions of 1.2 or later.  The solution here is that we
                            // interpret the # as an escape only when it is followed by two
                            // valid hex digits.
                            if (ReadHelper.IsHexDigit((char)ch1) && ReadHelper.IsHexDigit((char)ch2))
                            {
                                string hex = "" + (char)ch1 + (char)ch2;
                                try
                                {
                                    var byteToWrite = (byte)Convert.ToInt32(hex, 16);
                                    writer.Write(byteToWrite);
                                }
                                catch (FormatException e)
                                {
                                    throw new IOException("Error: expected hex digit, actual='" + hex + "'", e);
                                }
                                c = reader.Read();
                            }
                            else
                            {
                                // check for premature EOF
                                if (ch2 == -1 || ch1 == -1)
                                {
                                    //LOG.error("Premature EOF in BaseParser#parseCosName");
                                    c = -1;
                                    break;
                                }
                                reader.Unread(ch2);
                                c = ch1;
                                writer.Write(ch);
                            }
                        }
                        else if (ReadHelper.IsEndOfName(ch))
                        {
                            break;
                        }
                        else
                        {
                            writer.Write(ch);
                            c = reader.Read();
                        }
                    }
                    if (c != -1)
                    {
                        reader.Unread(c);
                    }

                    byte[] bytes = memoryStream.ToArray();
                    var    str   = ReadHelper.IsValidUtf8(bytes) ? Encoding.UTF8.GetString(memoryStream.ToArray()) : Encoding.GetEncoding("windows-1252").GetString(memoryStream.ToArray());
                    return(CosName.Create(str));
                }
        }
Beispiel #24
0
        public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool)
        {
            CosBase retval = null;

            ReadHelper.SkipSpaces(reader);
            int nextByte = reader.Peek();

            if (nextByte == -1)
            {
                return(null);
            }

            char c = (char)nextByte;

            switch (c)
            {
            case '<':
            {
                // pull off first left bracket
                int leftBracket = reader.Read();
                // check for second left bracket
                c = (char)reader.Peek();
                reader.Unread(leftBracket);
                if (c == '<')
                {
                    retval = dictionaryParser.Parse(reader, this, pool);
                    ReadHelper.SkipSpaces(reader);
                }
                else
                {
                    retval = stringParser.Parse(reader);
                }
                break;
            }

            case '[':
            {
                // array
                retval = arrayParser.Parse(reader, this, pool);
                break;
            }

            case '(':
                retval = stringParser.Parse(reader);
                break;

            case '/':
                // name
                retval = nameParser.Parse(reader);
                break;

            case 'n':
            {
                // null
                ReadHelper.ReadExpectedString(reader, "null");
                retval = CosNull.Null;
                break;
            }

            case 't':
            {
                string truestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(4));
                if (truestring.Equals("true"))
                {
                    retval = PdfBoolean.True;
                }
                else
                {
                    throw new IOException("expected true actual='" + truestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'f':
            {
                string falsestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(5));
                if (falsestring.Equals("false"))
                {
                    retval = PdfBoolean.False;
                }
                else
                {
                    throw new IOException("expected false actual='" + falsestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'R':
                reader.Read();
                retval = new CosObject(null);
                break;

            default:

                if (char.IsDigit(c) || c == '-' || c == '+' || c == '.')
                {
                    StringBuilder buf = new StringBuilder();
                    int           ic  = reader.Read();
                    c = (char)ic;
                    while (char.IsDigit(c) ||
                           c == '-' ||
                           c == '+' ||
                           c == '.' ||
                           c == 'E' ||
                           c == 'e')
                    {
                        buf.Append(c);
                        ic = reader.Read();
                        c  = (char)ic;
                    }
                    if (ic != -1)
                    {
                        reader.Unread(ic);
                    }
                    retval = CosNumberFactory.get(buf.ToString()) as CosBase;
                }
                else
                {
                    //This is not suppose to happen, but we will allow for it
                    //so we are more compatible with POS writers that don't
                    //follow the spec
                    string badstring = ReadHelper.ReadString(reader);
                    if (badstring == string.Empty)
                    {
                        int peek = reader.Peek();
                        // we can end up in an infinite loop otherwise
                        throw new IOException("Unknown dir object c='" + c +
                                              "' cInt=" + (int)c + " peek='" + (char)peek
                                              + "' peekInt=" + peek + " at offset " + reader.GetPosition());
                    }

                    // if it's an endstream/endobj, we want to put it back so the caller will see it
                    if (string.Equals("endobj", badstring) || string.Equals("endstream", badstring))
                    {
                        reader.Unread(OtherEncodings.StringAsLatin1Bytes(badstring));
                    }
                }
                break;
            }
            return(retval);
        }