Example #1
0
        public int Compare(object x, object y)
        {
            PdfFileObject a = x as PdfFileObject;
            PdfFileObject b = y as PdfFileObject;

            return(a.ObjID.CompareTo(b.ObjID));
        }
Example #2
0
        private PdfFileObject ParseTrailer(long xref)
        {
            PdfFileObject pfo = new PdfFileObject(this);

            pfo.OriginalText = "";
            pfo.ObjID        = 0;
            pfo.Type         = PdfObjectTypes.Trailer;

            this.inputStream.Seek(xref, SeekOrigin.Begin);
            StreamReader sr = new StreamReader(this.inputStream);
            string       line;
            bool         istrailer = false;

            while ((line = sr.ReadLine()) != null)
            {
                line = line.Trim();
                if (line == "startxref")
                {
                    return(pfo);
                }
                if (line.StartsWith("trailer"))
                {
                    pfo.OriginalText = "";
                    istrailer        = true;
                }
                if (istrailer)
                {
                    pfo.OriginalText += line + "\r";
                }
            }

            throw new Exception("Cannot find trailer");
        }
Example #3
0
 internal void PopulateRelatedObjects(Hashtable container)
 {
     if (!container.ContainsKey(this.ObjID))
     {
         container.Add(this.ObjID, this);
         Match m = regex1.Match(this.OriginalText);
         while (m.Success)
         {
             int  num       = int.Parse(m.Groups["id"].Value);
             bool notparent = m.Groups["parent"].Length == 0;
             if (notparent)
             {
                 if (!container.Contains(num))
                 {
                     PdfFileObject pfo = this.pdfFileParser.ReadObject(num);
                     if (pfo != null)
                     {
                         pfo.PopulateRelatedObjects(container);
                     }
                 }
             }
             m = m.NextMatch();
         }
     }
 }
Example #4
0
        internal PdfFileObject CreateFileObject(PdfParser analyzer)
        {
            PdfFileObject pfo = new PdfFileObject(analyzer);

            pfo.ObjID = this.ObjID;
            pfo.OriginalObjectAbsolutePosition = this.Address;
            pfo.OriginalObjectLength           = this.Length;
            pfo.StreamPosition = this.StreamPosition;
            pfo.StreamLength   = this.StreamLength;
            return(pfo);
        }
Example #5
0
        internal List <PdfFileObject> GetStreamObjects()
        {
            List <PdfFileObject> objects = new List <PdfFileObject>();

            if (this.Type != PdfObjectTypes.ObjStm)
            {
                return(objects);
            }

            string txt = this.StreamContent2Inflated;

            if (txt == null)
            {
                return(new List <PdfFileObject>());
            }

            int n = this.GetInt32Value("N").Value;

            string regex = "";

            for (int x = 0; x < n; x++)
            {
                regex += @"(\d+)\s+(\d+)\s+";
            }

            Match m = Regex.Match(txt, regex);

            for (int x = 0; x < n; x++)
            {
                PdfFileObject pfo = new PdfFileObject(this.pdfFileParser);
                pfo.ObjID = int.Parse(m.Groups[x * 2 + 1].Value);
                int position = int.Parse(m.Groups[x * 2 + 2].Value);
                if (x == n - 1)
                {
                    pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position) + "\nendobj\n";
                }
                else
                {
                    int length = int.Parse(m.Groups[x * 2 + 4].Value) - position;
                    pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position, length) + "\nendobj\n";
                }
                objects.Add(pfo);
            }


            return(objects);
        }
Example #6
0
        internal virtual void Transform(Hashtable TransformationTable)
        {
            if (transformed)
            {
                throw new Exception("Already transformed!");
            }
            transformed = true;
            if (this.Type == PdfObjectTypes.Page)
            {
            }
            this.TransformationTable = TransformationTable;
            this.TransformatedObjID  = (int)this.TransformationTable[this.ObjID];

            this.filterEval = new MatchEvaluator(this.FilterEval);

            this.TransformatedTextPart = this.OriginalTextPart;

            if (this.Type == PdfObjectTypes.Page && this.MediaBoxText == "")
            {
                PdfFileObject parent = this.GetParent();
                while (parent != null)
                {
                    string mb = parent.MediaBoxText;
                    if (mb == "")
                    {
                        parent = parent.GetParent();
                    }
                    else
                    {
                        this.TransformatedTextPart = Regex.Replace(this.TransformatedTextPart, @"/Type\s*/Page", "/Type /Page\r" + mb);
                        parent = null;
                    }
                }
            }
            this.TransformatedTextPart = Regex.Replace(this.TransformatedTextPart
                                                       , @"(?'id'\d+)(?'rest' 0 (obj|R))(?'end'[^G])", this.filterEval, RegexOptions.ExplicitCapture);
            this.TransformatedTextPart = Regex.Replace(this.TransformatedTextPart
                                                       , @"/Parent\s+(\d+ 0 R)*", "/Parent 2 0 R \r");
        }
        internal List<PdfFileObject> GetStreamObjects()
        {
            List<PdfFileObject> objects = new List<PdfFileObject>();
            if (this.Type != PdfObjectTypes.ObjStm)
                return objects;

            string txt = this.StreamContent2Inflated;
            if (txt == null)
                return new List<PdfFileObject>();

            int n = this.GetInt32Value("N").Value;

            string regex = "";
            for (int x = 0; x < n; x++)
            {
                regex += @"(\d+)\s+(\d+)\s+";
            }

            Match m = Regex.Match(txt, regex);

            for (int x = 0; x < n; x++)
            {
                PdfFileObject pfo = new PdfFileObject(this.pdfFileParser);
                pfo.ObjID = int.Parse(m.Groups[x * 2 + 1].Value);
                int position = int.Parse(m.Groups[x * 2 + 2].Value);
                if (x == n - 1)
                {
                    pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position) + "\nendobj\n";
                }
                else
                {
                    int length = int.Parse(m.Groups[x * 2 + 4].Value) - position;
                    pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position, length) + "\nendobj\n";
                }
                objects.Add(pfo);
            }


            return objects;
        }
Example #8
0
        internal Dictionary <int, PdfFileObject> LoadObjects(Stream s)
        {
            s.Seek(0, SeekOrigin.Begin);
            Dictionary <int, PdfFileObject> objects = new Dictionary <int, PdfFileObject>();
            int           c = 0, objIDCandidate = 0, streamLengthCandidate = -1;
            long          streamPositionCandidate = -2;
            long          addressCandidate = -1, step = 0, position = 0;
            ObjectAddress oa = null;

            while ((c = s.ReadByte()) != -1)
            {
                switch (step)
                {
                case 0:
                    if (oa != null)
                    {
                        if (c == ' ' || c == '\r' || c == '\n')
                        {
                            oa.Length++;
                        }
                        else
                        {
                            PdfFileObject pfo = oa.CreateFileObject(this);
                            if (pfo.OriginalObjectAbsolutePosition == this.startxref)
                            {
                                this.trailer = pfo;
                            }

                            objects[oa.ObjID] = pfo;
                            this.inputStream.Seek(position + 1, SeekOrigin.Begin);
                            oa = null;
                            streamPositionCandidate = -2;
                            streamLengthCandidate   = -1;
                        }
                    }
                    if (c > 48 && c < 58)
                    {
                        addressCandidate = position;
                        objIDCandidate   = c - 48;
                        step             = 1;
                    }
                    break;

                case 1:
                    if (c > 47 && c < 58)
                    {
                        objIDCandidate = objIDCandidate * 10 + c - 48;
                    }
                    else
                    {
                        step = (c == ' ') ? 2 : 0;
                    }
                    break;

                case 2:
                    step = (c == '0') ? 3 : 0;
                    break;

                case 3:
                    step = (c == ' ') ? 4 : 0;
                    break;

                case 4:
                    step = (c == 'o') ? 5 : 0;
                    break;

                case 5:
                    step = (c == 'b') ? 6 : 0;
                    break;

                case 6:
                    if (c == 'j')
                    {
                        step = 7;
                    }
                    else
                    {
                        step = 0;
                    }
                    break;

                case 7:

                    if (objIDCandidate != lastObjID)
                    {
                        lastObjID = objIDCandidate;
                    }

                    if (objIDCandidate == 8)
                    {
                        if (c == '>')
                        {
                        }
                    }

                    if (c == 'e')
                    {
                        // possible beginning of stream
                        step = 29;
                    }
                    if (c == 's')
                    {
                        step = 9;
                        break;
                    }
                    break;

                case 8:
                    if (c == 's')
                    {
                        step++;
                        break;
                    }
                    if (c == 'e')
                    {
                        step = 29;
                        break;
                    }
                    if (c == '\r' || c == '\n' || c == ' ')
                    {
                        break;
                    }
                    step = 7;
                    break;

                case 9:
                    step = (c == 't') ? 10 : 7;
                    break;

                case 10:
                    step = (c == 'r') ? 11 : 7;
                    break;

                case 11:
                    step = (c == 'e') ? 12 : 7;
                    break;

                case 12:
                    step = (c == 'a') ? 13 : 7;
                    break;

                case 13:
                    step = (c == 'm') ? 14 : 7;
                    break;

                case 14:
                    step = (c == '\r' || c == '\n' || c == ' ') ? 15 : 7;
                    break;

                case 15:
                    if (c == '\r' || c == '\n' || c == ' ')
                    {
                    }
                    else
                    {
                        step++;
                        streamPositionCandidate = position;
                    }
                    break;

                case 16:
                    if (c == 'e')
                    {
                        step++;
                        break;
                    }

                    streamLengthCandidate = (int)(position - streamPositionCandidate + 1);
                    break;

                case 17:
                    step = (c == 'n') ? 18 : 16;
                    break;

                case 18:
                    step = (c == 'd') ? 19 : 16;
                    break;

                case 19:
                    if (c == 'o')
                    {
                        step = 32;
                        break;
                    }
                    if (c == 's')
                    {
                        step = 20;
                        break;
                    }
                    step = 16;
                    break;

                case 20:
                    step = (c == 't') ? 21 : 16;
                    break;

                case 21:
                    step = (c == 'r') ? 22 : 16;
                    break;

                case 22:
                    step = (c == 'e') ? 23 : 16;
                    break;

                case 23:
                    step = (c == 'a') ? 24 : 16;
                    break;

                case 24:
                    step = (c == 'm') ? 25 : 16;
                    break;

                case 25:
                    step = (c == '\r' || c == '\n' || c == ' ') ? 28 : 16;
                    break;


                case 28:
                    step = (c == 'e') ? 29 : 7;
                    break;

                case 29:
                    step = (c == 'n') ? 30 : 7;
                    break;

                case 30:
                    step = (c == 'd') ? 31 : 7;
                    break;

                case 31:
                    step = (c == 'o') ? 32 : 7;
                    break;

                case 32:
                    step = (c == 'b') ? 33 : 7;
                    break;

                case 33:
                    if (c == 'j')
                    {
                        step = 0;
                        if (objIDCandidate == 140)
                        {
                        }
                        oa = new ObjectAddress(objIDCandidate, addressCandidate
                                               , (int)(position - addressCandidate + 1), streamPositionCandidate, streamLengthCandidate);
                    }
                    else
                    {
                        step = 7;
                    }
                    break;
                }
                position++;
            }

            List <PdfFileObject> ObjStms = new List <PdfFileObject>();

            foreach (PdfFileObject pfo in objects.Values)
            {
                if (pfo.Type == PdfObjectTypes.ObjStm)
                {
                    ObjStms.Add(pfo);
                }
            }
            foreach (PdfFileObject ObjStm in ObjStms)
            {
                foreach (PdfFileObject pfo2 in ObjStm.GetStreamObjects())
                {
                    objects[pfo2.ObjID] = pfo2;
                }
            }
            return(objects);
        }
 internal PdfFileObject CreateFileObject(PdfParser analyzer)
 {
     PdfFileObject pfo = new PdfFileObject(analyzer);
     pfo.ObjID = this.ObjID;
     pfo.OriginalObjectAbsolutePosition = this.Address;
     pfo.OriginalObjectLength = this.Length;
     pfo.StreamPosition = this.StreamPosition;
     pfo.StreamLength = this.StreamLength;           
     return pfo;
 }
        internal Dictionary<int, PdfFileObject> LoadObjects(Stream s)
        {
            s.Seek(0, SeekOrigin.Begin);
            Dictionary<int, PdfFileObject> objects = new Dictionary<int, PdfFileObject>();
            int c = 0, objIDCandidate = 0, streamLengthCandidate = -1;
            long streamPositionCandidate = -2;
            long addressCandidate = -1, step = 0, position = 0;
            ObjectAddress oa = null;
            while ((c = s.ReadByte()) != -1)
            {               
                switch (step)
                {
                    case 0:
                        if (oa != null)
                        {
                            if (c == ' ' || c == '\r' || c == '\n')
                            {
                                oa.Length++;
                            }
                            else
                            {
                                PdfFileObject pfo = oa.CreateFileObject(this);
                                if (pfo.OriginalObjectAbsolutePosition == this.startxref)
                                {
                                    this.trailer = pfo;
                                }

                                objects[oa.ObjID] = pfo;
                                this.inputStream.Seek(position+1, SeekOrigin.Begin);
                                oa = null;
                                streamPositionCandidate = -2;
                                streamLengthCandidate = -1;
                            }
                        }
                        if (c > 48 && c < 58)
                        {
                            addressCandidate = position;
                            objIDCandidate = c - 48;
                            step = 1;
                        }
                        break;
                    case 1:
                        if (c > 47 && c < 58)
                        {
                            objIDCandidate = objIDCandidate * 10 + c - 48;                           
                        }
                        else
                        {
                            step = (c == ' ') ? 2 : 0;
                        }
                        break;
                    case 2:
                        step = (c == '0') ? 3 : 0;
                        break;
                    case 3:
                        step = (c == ' ') ? 4 : 0;
                        break;
                    case 4:
                        step = (c == 'o') ? 5 : 0;
                        break;
                    case 5:
                        step = (c == 'b') ? 6 : 0;
                        break;
                    case 6:
                        if (c == 'j')
                        {
                            step = 7;
                           
                        }
                        else
                        {
                            step = 0;
                        }
                        break;
                    case 7:
                        
                        if (objIDCandidate !=lastObjID)
                        {
                            lastObjID = objIDCandidate;
                            
                        }

                        if (objIDCandidate == 8)
                        {
                            if (c == '>')
                            {

                            }
                        }
                       
                        if (c == 'e')
                        {
                            // possible beginning of stream
                            step = 29;
                        }
                        if (c == 's')
                        {
                            step = 9;
                            break;
                        }
                        break;
                    case 8:
                        if (c == 's')
                        {
                            step++;
                            break;
                        }
                        if (c == 'e')
                        {
                            step = 29;
                            break;
                        }
                        if (c == '\r' || c == '\n' || c == ' ')
                        {
                            break;
                        }
                        step = 7;
                        break;
                    case 9:
                        step = (c == 't') ? 10 : 7;
                        break;
                    case 10:
                        step = (c == 'r') ? 11 : 7;
                        break;
                    case 11:
                        step = (c == 'e') ? 12 : 7;
                        break;
                    case 12:
                        step = (c == 'a') ? 13 : 7;
                        break;
                    case 13:
                        step = (c == 'm') ? 14 : 7;
                        break;
                    case 14:
                        step = (c == '\r' || c == '\n' || c == ' ') ? 15 : 7;
                        break;
                    case 15:
                        if (c == '\r' ||c  == '\n' || c == ' ')
                        {

                        }
                        else
                        {
                            step++;
                            streamPositionCandidate = position;
                        }
                        break;
                    case 16:
                        if (c == 'e')
                        {
                            step++;
                            break;
                        }
                       
                        streamLengthCandidate = (int)(position - streamPositionCandidate + 1);
                        break;
                    case 17:
                        step = (c == 'n') ? 18 : 16;
                        break;
                    case 18:
                        step = (c == 'd') ? 19 : 16;
                        break;
                    case 19:
                        if (c == 'o')
                        {
                            step = 32;
                            break;
                        }
                        if (c == 's')
                        {
                            step = 20;
                            break;
                        }
                        step = 16;
                        break;
                    case 20:
                        step = (c == 't') ? 21 : 16;
                        break;
                    case 21:
                        step = (c == 'r') ? 22 : 16;
                        break;
                    case 22:
                        step = (c == 'e') ? 23 : 16;
                        break;
                    case 23:
                        step = (c == 'a') ? 24 : 16;
                        break;
                    case 24:
                        step = (c == 'm') ? 25 : 16;
                        break;
                    case 25:
                        step = (c == '\r' || c == '\n' || c == ' ') ? 28 : 16;
                        break;
                        

                    case 28:
                        step = (c == 'e') ? 29 : 7;
                        break;
                    case 29:
                        step = (c == 'n') ? 30 : 7;
                        break;
                    case 30:
                        step = (c == 'd') ? 31 : 7;
                        break;
                    case 31:
                        step = (c == 'o') ? 32 : 7;
                        break;
                    case 32:
                        step = (c == 'b') ? 33 : 7;
                        break;
                    case 33:
                        if (c == 'j')
                        {
                            step = 0;
                            if (objIDCandidate == 140)
                            {

                            }
                            oa = new ObjectAddress(objIDCandidate, addressCandidate
                                ,(int)( position - addressCandidate+1),streamPositionCandidate,streamLengthCandidate);
                           
                        }
                        else
                        {
                            step = 7;
                        }
                        break;



                }
                position++;
            }

            List<PdfFileObject> ObjStms = new List<PdfFileObject>();
            foreach (PdfFileObject pfo in objects.Values)
            {
                if (pfo.Type == PdfObjectTypes.ObjStm)
                {
                    ObjStms.Add(pfo);
                }
            }
            foreach (PdfFileObject ObjStm in ObjStms)
            {
                foreach (PdfFileObject pfo2 in ObjStm.GetStreamObjects())
                {
                    objects[pfo2.ObjID] = pfo2;
                }
            }
            return objects;
        }
        private PdfFileObject ParseTrailer(long xref)
        {
            PdfFileObject pfo = new PdfFileObject(this);
            pfo.OriginalText = "";
            pfo.ObjID = 0;
            pfo.Type = PdfObjectTypes.Trailer;

            this.inputStream.Seek(xref, SeekOrigin.Begin);
            StreamReader sr = new StreamReader(this.inputStream);
            string line;
            bool istrailer = false;
            while ((line = sr.ReadLine()) != null)
            {
                line = line.Trim();
                if (line == "startxref")
                {
                    return pfo;
                }
                if (line.StartsWith("trailer"))
                {
                    pfo.OriginalText = "";
                    istrailer = true;
                }
                if (istrailer)
                {
                    pfo.OriginalText += line + "\r";
                }
            }

            throw new Exception("Cannot find trailer");
        }
Example #12
0
        public void Add(PdfParser pdfParser, int[] PageNumbers)
        {
            if (this.CancelPending)
            {
                return;
            }

            this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount,
                                                                        0, 0, "Analyzing PDF Structure"));


            PdfFileObject[] pages = pdfParser.GetAllPages();

            ArrayList selectedPages = new ArrayList();

            #region gets needed objects

            Hashtable relatedObjects = new Hashtable();

            if (PageNumbers == null)
            {
                List <int> ps = new List <int>();
                for (int p = 0; p < pages.Length; p++)
                {
                    ps.Add(p);
                }
                PageNumbers = ps.ToArray();
            }


            int currentPageIndex = 1, pageCount = PageNumbers.Length;

            int step = Math.Max(pageCount / 20, 10);

            foreach (int pageNumber in PageNumbers)
            {
                if (this.CancelPending)
                {
                    return;
                }

                if (currentPageIndex % step == 0 || currentPageIndex == pageCount)
                {
                    this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount,
                                                                                currentPageIndex, pageCount, "Analyzing Page {0} of {1}"));
                }
                PdfFileObject selectedPage = pages[pageNumber];
                selectedPages.Add(selectedPage);
                selectedPage.PopulateRelatedObjects(relatedObjects);
                currentPageIndex++;
            }

            ArrayList neededObjects = new ArrayList();
            neededObjects.AddRange(relatedObjects.Values);
            neededObjects.Sort(new PdfFileObjectNumberComparer());

            #endregion

            #region creates IDs transformation table

            int objectIndex = 1;
            int objectCount = neededObjects.Count;

            step = Math.Max(objectCount / 20, 50);

            Hashtable transformationTable = new Hashtable();
            foreach (PdfFileObject pfo in neededObjects)
            {
                if (this.CancelPending)
                {
                    return;
                }

                if (objectIndex % step == 0 || objectIndex == objectCount)
                {
                    this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount,
                                                                                objectIndex, objectCount, "Rebuilding Indexes for object {0} of {1}"));
                }

                string hash = OptimizeStreams?pfo.Hash:null;
                if (hash != null && this.alreadyUsedObjects.ContainsKey(hash))
                {
                    pfo.excludedByHashComparison = true;
                    transformationTable.Add(pfo.ObjID, (int)this.alreadyUsedObjects[hash]);
                }
                else
                {
                    number++;
                    transformationTable.Add(pfo.ObjID, number);
                    if (hash != null)
                    {
                        this.alreadyUsedObjects.Add(hash, number);
                    }
                }
                objectIndex++;
            }

            #endregion

            objectIndex = 1;

            foreach (PdfFileObject pfo in neededObjects)
            {
                if (this.CancelPending)
                {
                    return;
                }

                if (objectIndex % step == 0 || objectIndex == objectCount)
                {
                    this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount,
                                                                                objectIndex, objectCount, "Writing object {0} of {1}"));
                }

                pfo.Transform(transformationTable);
                if (!pfo.excludedByHashComparison)
                {
                    this.xrefs.Add(pos);
                    this.pos += pfo.WriteToStream(this.target);
                }
                pfo.TransformatedTextPart = null;
                objectIndex++;
            }

            foreach (PdfFileObject selectedPage in selectedPages)
            {
                this.pageNumbers.Add(selectedPage.TransformatedObjID);
            }


            this.elementCount++;
        }