Beispiel #1
0
        private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
        {
            var numOffset = reader.GetPosition();
            var value     = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);

            // proceed if the given object is a number and the following is a number as well
            if (!(value is ICosNumber) || !ReadHelper.IsDigit(reader))
            {
                return(value);
            }
            // read the remaining information of the object number
            var genOffset        = reader.GetPosition();
            var generationNumber = baseParser.Parse(reader, pool);

            ReadHelper.SkipSpaces(reader);
            ReadHelper.ReadExpectedChar(reader, 'R');
            if (!(value is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + numOffset);
            }
            if (!(generationNumber is CosInt))
            {
                throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset);
            }

            var key = new CosObjectKey(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt());

            // dereference the object
            return(pool.Get(key));
        }
Beispiel #2
0
        private bool checkObjectKeys(IRandomAccessRead source, CosObjectKey objectKey, long offset)
        {
            // there can't be any object at the very beginning of a pdf
            if (offset < MINIMUM_SEARCH_OFFSET)
            {
                return(false);
            }
            long   objectNr     = objectKey.Number;
            long   objectGen    = objectKey.Generation;
            long   originOffset = source.GetPosition();
            string objectString = ObjectHelper.createObjectString(objectNr, objectGen);

            try
            {
                source.Seek(offset);
                if (ReadHelper.IsString(source, OtherEncodings.StringAsLatin1Bytes(objectString)))
                {
                    // everything is ok, return origin object key
                    source.Seek(originOffset);
                    return(true);
                }
            }
            catch (InvalidOperationException exception)
            {
                // Swallow the exception, obviously there isn't any valid object number
            }
            finally
            {
                source.Seek(originOffset);
            }
            // no valid object number found
            return(false);
        }
Beispiel #3
0
 private void bfSearchForLastEOFMarker(IRandomAccessRead source)
 {
     if (lastEOFMarker == null)
     {
         long originOffset = source.GetPosition();
         source.Seek(MINIMUM_SEARCH_OFFSET);
         while (!source.IsEof())
         {
             // search for EOF marker
             if (ReadHelper.IsString(source, "%%EOF"))
             {
                 long tempMarker = source.GetPosition();
                 source.Seek(tempMarker + 5);
                 try
                 {
                     // check if the following data is some valid pdf content
                     // which most likely indicates that the pdf is linearized,
                     // updated or just cut off somewhere in the middle
                     ReadHelper.SkipSpaces(source);
                     ObjectHelper.ReadObjectNumber(source);
                     ObjectHelper.ReadGenerationNumber(source);
                 }
                 catch (InvalidOperationException exception)
                 {
                     // save the EOF marker as the following data is most likely some garbage
                     lastEOFMarker = tempMarker;
                 }
             }
             source.Read();
         }
         source.Seek(originOffset);
         // no EOF marker found
         if (lastEOFMarker == null)
         {
             lastEOFMarker = long.MaxValue;
         }
     }
 }
Beispiel #4
0
        public static bool IsString(IRandomAccessRead reader, IEnumerable <byte> str)
        {
            bool bytesMatching = true;
            long originOffset  = reader.GetPosition();

            foreach (var c in str)
            {
                if (reader.Read() != c)
                {
                    bytesMatching = false;
                    break;
                }
            }
            reader.Seek(originOffset);

            return(bytesMatching);
        }
Beispiel #5
0
        public static long ReadLong(IRandomAccessRead reader)
        {
            SkipSpaces(reader);
            long retval;

            StringBuilder longBuffer = ReadStringNumber(reader);

            try
            {
                retval = long.Parse(longBuffer.ToString());
            }
            catch (FormatException e)
            {
                var bytesToReverse = OtherEncodings.StringAsLatin1Bytes(longBuffer.ToString());
                reader.Unread(bytesToReverse);

                throw new InvalidOperationException($"Error: Expected a long type at offset {reader.GetPosition()}, instead got \'{longBuffer}\'", e);
            }

            return(retval);
        }
Beispiel #6
0
 private void BfSearchForXRefStreams()
 {
     if (bfSearchXRefStreamsOffsets == null)
     {
         // a pdf may contain more than one /XRef entry
         bfSearchXRefStreamsOffsets = new List <long>();
         long originOffset = source.GetPosition();
         source.Seek(MinimumSearchOffset);
         // search for XRef streams
         var objString = " obj";
         while (!source.IsEof())
         {
             if (ReadHelper.IsString(source, "xref"))
             {
                 // search backwards for the beginning of the stream
                 long newOffset  = -1;
                 long xrefOffset = source.GetPosition();
                 bool objFound   = false;
                 for (int i = 1; i < 40 && !objFound; i++)
                 {
                     long currentOffset = xrefOffset - (i * 10);
                     if (currentOffset > 0)
                     {
                         source.Seek(currentOffset);
                         for (int j = 0; j < 10; j++)
                         {
                             if (ReadHelper.IsString(source, objString))
                             {
                                 long tempOffset = currentOffset - 1;
                                 source.Seek(tempOffset);
                                 int genId = source.Peek();
                                 // is the next char a digit?
                                 if (ReadHelper.IsDigit(genId))
                                 {
                                     tempOffset--;
                                     source.Seek(tempOffset);
                                     if (ReadHelper.IsSpace(source))
                                     {
                                         int length = 0;
                                         source.Seek(--tempOffset);
                                         while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(source))
                                         {
                                             source.Seek(--tempOffset);
                                             length++;
                                         }
                                         if (length > 0)
                                         {
                                             source.Read();
                                             newOffset = source.GetPosition();
                                         }
                                     }
                                 }
                                 objFound = true;
                                 break;
                             }
                             else
                             {
                                 currentOffset++;
                                 source.Read();
                             }
                         }
                     }
                 }
                 if (newOffset > -1)
                 {
                     bfSearchXRefStreamsOffsets.Add(newOffset);
                 }
                 source.Seek(xrefOffset + 5);
             }
             source.Read();
         }
         source.Seek(originOffset);
     }
 }
Beispiel #7
0
        private void bfSearchForObjects(IRandomAccessRead source)
        {
            bfSearchForLastEOFMarker(source);
            bfSearchCOSObjectKeyOffsets = new Dictionary <CosObjectKey, long>();
            long originOffset  = source.GetPosition();
            long currentOffset = MINIMUM_SEARCH_OFFSET;
            long lastObjectId  = long.MinValue;
            int  lastGenID     = int.MinValue;
            long lastObjOffset = long.MinValue;

            char[] objString    = " obj".ToCharArray();
            char[] endobjString = "endobj".ToCharArray();
            bool   endobjFound  = false;

            do
            {
                source.Seek(currentOffset);
                if (ReadHelper.IsString(source, "obj"))
                {
                    long tempOffset = currentOffset - 1;
                    source.Seek(tempOffset);
                    int genID = source.Peek();
                    // is the next char a digit?
                    if (ReadHelper.IsDigit(genID))
                    {
                        genID -= 48;
                        tempOffset--;
                        source.Seek(tempOffset);
                        if (ReadHelper.IsSpace(source))
                        {
                            while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsSpace(source))
                            {
                                source.Seek(--tempOffset);
                            }
                            bool objectIDFound = false;
                            while (tempOffset > MINIMUM_SEARCH_OFFSET && ReadHelper.IsDigit(source))
                            {
                                source.Seek(--tempOffset);
                                objectIDFound = true;
                            }
                            if (objectIDFound)
                            {
                                source.Read();
                                long objectId = ObjectHelper.ReadObjectNumber(source);
                                if (lastObjOffset > 0)
                                {
                                    // add the former object ID only if there was a subsequent object ID
                                    bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset;
                                }
                                lastObjectId   = objectId;
                                lastGenID      = genID;
                                lastObjOffset  = tempOffset + 1;
                                currentOffset += objString.Length - 1;
                                endobjFound    = false;
                            }
                        }
                    }
                }
                else if (ReadHelper.IsString(source, "endobj"))
                {
                    endobjFound    = true;
                    currentOffset += endobjString.Length - 1;
                }
                currentOffset++;
            } while (currentOffset < lastEOFMarker && !source.IsEof());
            if ((lastEOFMarker < long.MaxValue || endobjFound) && lastObjOffset > 0)
            {
                // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
                // the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
                bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset;
            }
            // reestablish origin position

            source.Seek(originOffset);
        }
Beispiel #8
0
        public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool)
        {
            CosBase retval = null;

            ReadHelper.SkipSpaces(reader);
            int nextByte = reader.Peek();

            if (nextByte == -1)
            {
                return(null);
            }

            char c = (char)nextByte;

            switch (c)
            {
            case '<':
            {
                // pull off first left bracket
                int leftBracket = reader.Read();
                // check for second left bracket
                c = (char)reader.Peek();
                reader.Unread(leftBracket);
                if (c == '<')
                {
                    retval = dictionaryParser.Parse(reader, this, pool);
                    ReadHelper.SkipSpaces(reader);
                }
                else
                {
                    retval = stringParser.Parse(reader);
                }
                break;
            }

            case '[':
            {
                // array
                retval = arrayParser.Parse(reader, this, pool);
                break;
            }

            case '(':
                retval = stringParser.Parse(reader);
                break;

            case '/':
                // name
                retval = nameParser.Parse(reader);
                break;

            case 'n':
            {
                // null
                ReadHelper.ReadExpectedString(reader, "null");
                retval = CosNull.Null;
                break;
            }

            case 't':
            {
                string truestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(4));
                if (truestring.Equals("true"))
                {
                    retval = PdfBoolean.True;
                }
                else
                {
                    throw new IOException("expected true actual='" + truestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'f':
            {
                string falsestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(5));
                if (falsestring.Equals("false"))
                {
                    retval = PdfBoolean.False;
                }
                else
                {
                    throw new IOException("expected false actual='" + falsestring + "' " + reader +
                                          "' at offset " + reader.GetPosition());
                }
                break;
            }

            case 'R':
                reader.Read();
                retval = new CosObject(null);
                break;

            default:

                if (char.IsDigit(c) || c == '-' || c == '+' || c == '.')
                {
                    StringBuilder buf = new StringBuilder();
                    int           ic  = reader.Read();
                    c = (char)ic;
                    while (char.IsDigit(c) ||
                           c == '-' ||
                           c == '+' ||
                           c == '.' ||
                           c == 'E' ||
                           c == 'e')
                    {
                        buf.Append(c);
                        ic = reader.Read();
                        c  = (char)ic;
                    }
                    if (ic != -1)
                    {
                        reader.Unread(ic);
                    }
                    retval = CosNumberFactory.get(buf.ToString()) as CosBase;
                }
                else
                {
                    //This is not suppose to happen, but we will allow for it
                    //so we are more compatible with POS writers that don't
                    //follow the spec
                    string badstring = ReadHelper.ReadString(reader);
                    if (badstring == string.Empty)
                    {
                        int peek = reader.Peek();
                        // we can end up in an infinite loop otherwise
                        throw new IOException("Unknown dir object c='" + c +
                                              "' cInt=" + (int)c + " peek='" + (char)peek
                                              + "' peekInt=" + peek + " at offset " + reader.GetPosition());
                    }

                    // if it's an endstream/endobj, we want to put it back so the caller will see it
                    if (string.Equals("endobj", badstring) || string.Equals("endstream", badstring))
                    {
                        reader.Unread(OtherEncodings.StringAsLatin1Bytes(badstring));
                    }
                }
                break;
            }
            return(retval);
        }