public int Compare(object x, object y) { PdfFileObject a = x as PdfFileObject; PdfFileObject b = y as PdfFileObject; return(a.ObjID.CompareTo(b.ObjID)); }
private PdfFileObject ParseTrailer(long xref) { PdfFileObject pfo = new PdfFileObject(this); pfo.OriginalText = ""; pfo.ObjID = 0; pfo.Type = PdfObjectTypes.Trailer; this.inputStream.Seek(xref, SeekOrigin.Begin); StreamReader sr = new StreamReader(this.inputStream); string line; bool istrailer = false; while ((line = sr.ReadLine()) != null) { line = line.Trim(); if (line == "startxref") { return(pfo); } if (line.StartsWith("trailer")) { pfo.OriginalText = ""; istrailer = true; } if (istrailer) { pfo.OriginalText += line + "\r"; } } throw new Exception("Cannot find trailer"); }
internal void PopulateRelatedObjects(Hashtable container) { if (!container.ContainsKey(this.ObjID)) { container.Add(this.ObjID, this); Match m = regex1.Match(this.OriginalText); while (m.Success) { int num = int.Parse(m.Groups["id"].Value); bool notparent = m.Groups["parent"].Length == 0; if (notparent) { if (!container.Contains(num)) { PdfFileObject pfo = this.pdfFileParser.ReadObject(num); if (pfo != null) { pfo.PopulateRelatedObjects(container); } } } m = m.NextMatch(); } } }
internal PdfFileObject CreateFileObject(PdfParser analyzer) { PdfFileObject pfo = new PdfFileObject(analyzer); pfo.ObjID = this.ObjID; pfo.OriginalObjectAbsolutePosition = this.Address; pfo.OriginalObjectLength = this.Length; pfo.StreamPosition = this.StreamPosition; pfo.StreamLength = this.StreamLength; return(pfo); }
internal List <PdfFileObject> GetStreamObjects() { List <PdfFileObject> objects = new List <PdfFileObject>(); if (this.Type != PdfObjectTypes.ObjStm) { return(objects); } string txt = this.StreamContent2Inflated; if (txt == null) { return(new List <PdfFileObject>()); } int n = this.GetInt32Value("N").Value; string regex = ""; for (int x = 0; x < n; x++) { regex += @"(\d+)\s+(\d+)\s+"; } Match m = Regex.Match(txt, regex); for (int x = 0; x < n; x++) { PdfFileObject pfo = new PdfFileObject(this.pdfFileParser); pfo.ObjID = int.Parse(m.Groups[x * 2 + 1].Value); int position = int.Parse(m.Groups[x * 2 + 2].Value); if (x == n - 1) { pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position) + "\nendobj\n"; } else { int length = int.Parse(m.Groups[x * 2 + 4].Value) - position; pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position, length) + "\nendobj\n"; } objects.Add(pfo); } return(objects); }
internal virtual void Transform(Hashtable TransformationTable) { if (transformed) { throw new Exception("Already transformed!"); } transformed = true; if (this.Type == PdfObjectTypes.Page) { } this.TransformationTable = TransformationTable; this.TransformatedObjID = (int)this.TransformationTable[this.ObjID]; this.filterEval = new MatchEvaluator(this.FilterEval); this.TransformatedTextPart = this.OriginalTextPart; if (this.Type == PdfObjectTypes.Page && this.MediaBoxText == "") { PdfFileObject parent = this.GetParent(); while (parent != null) { string mb = parent.MediaBoxText; if (mb == "") { parent = parent.GetParent(); } else { this.TransformatedTextPart = Regex.Replace(this.TransformatedTextPart, @"/Type\s*/Page", "/Type /Page\r" + mb); parent = null; } } } this.TransformatedTextPart = Regex.Replace(this.TransformatedTextPart , @"(?'id'\d+)(?'rest' 0 (obj|R))(?'end'[^G])", this.filterEval, RegexOptions.ExplicitCapture); this.TransformatedTextPart = Regex.Replace(this.TransformatedTextPart , @"/Parent\s+(\d+ 0 R)*", "/Parent 2 0 R \r"); }
internal List<PdfFileObject> GetStreamObjects() { List<PdfFileObject> objects = new List<PdfFileObject>(); if (this.Type != PdfObjectTypes.ObjStm) return objects; string txt = this.StreamContent2Inflated; if (txt == null) return new List<PdfFileObject>(); int n = this.GetInt32Value("N").Value; string regex = ""; for (int x = 0; x < n; x++) { regex += @"(\d+)\s+(\d+)\s+"; } Match m = Regex.Match(txt, regex); for (int x = 0; x < n; x++) { PdfFileObject pfo = new PdfFileObject(this.pdfFileParser); pfo.ObjID = int.Parse(m.Groups[x * 2 + 1].Value); int position = int.Parse(m.Groups[x * 2 + 2].Value); if (x == n - 1) { pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position) + "\nendobj\n"; } else { int length = int.Parse(m.Groups[x * 2 + 4].Value) - position; pfo.OriginalText = pfo.ObjID + " 0 obj " + txt.Substring(m.Length + position, length) + "\nendobj\n"; } objects.Add(pfo); } return objects; }
internal Dictionary <int, PdfFileObject> LoadObjects(Stream s) { s.Seek(0, SeekOrigin.Begin); Dictionary <int, PdfFileObject> objects = new Dictionary <int, PdfFileObject>(); int c = 0, objIDCandidate = 0, streamLengthCandidate = -1; long streamPositionCandidate = -2; long addressCandidate = -1, step = 0, position = 0; ObjectAddress oa = null; while ((c = s.ReadByte()) != -1) { switch (step) { case 0: if (oa != null) { if (c == ' ' || c == '\r' || c == '\n') { oa.Length++; } else { PdfFileObject pfo = oa.CreateFileObject(this); if (pfo.OriginalObjectAbsolutePosition == this.startxref) { this.trailer = pfo; } objects[oa.ObjID] = pfo; this.inputStream.Seek(position + 1, SeekOrigin.Begin); oa = null; streamPositionCandidate = -2; streamLengthCandidate = -1; } } if (c > 48 && c < 58) { addressCandidate = position; objIDCandidate = c - 48; step = 1; } break; case 1: if (c > 47 && c < 58) { objIDCandidate = objIDCandidate * 10 + c - 48; } else { step = (c == ' ') ? 2 : 0; } break; case 2: step = (c == '0') ? 3 : 0; break; case 3: step = (c == ' ') ? 4 : 0; break; case 4: step = (c == 'o') ? 5 : 0; break; case 5: step = (c == 'b') ? 6 : 0; break; case 6: if (c == 'j') { step = 7; } else { step = 0; } break; case 7: if (objIDCandidate != lastObjID) { lastObjID = objIDCandidate; } if (objIDCandidate == 8) { if (c == '>') { } } if (c == 'e') { // possible beginning of stream step = 29; } if (c == 's') { step = 9; break; } break; case 8: if (c == 's') { step++; break; } if (c == 'e') { step = 29; break; } if (c == '\r' || c == '\n' || c == ' ') { break; } step = 7; break; case 9: step = (c == 't') ? 10 : 7; break; case 10: step = (c == 'r') ? 11 : 7; break; case 11: step = (c == 'e') ? 12 : 7; break; case 12: step = (c == 'a') ? 13 : 7; break; case 13: step = (c == 'm') ? 14 : 7; break; case 14: step = (c == '\r' || c == '\n' || c == ' ') ? 15 : 7; break; case 15: if (c == '\r' || c == '\n' || c == ' ') { } else { step++; streamPositionCandidate = position; } break; case 16: if (c == 'e') { step++; break; } streamLengthCandidate = (int)(position - streamPositionCandidate + 1); break; case 17: step = (c == 'n') ? 18 : 16; break; case 18: step = (c == 'd') ? 19 : 16; break; case 19: if (c == 'o') { step = 32; break; } if (c == 's') { step = 20; break; } step = 16; break; case 20: step = (c == 't') ? 21 : 16; break; case 21: step = (c == 'r') ? 22 : 16; break; case 22: step = (c == 'e') ? 23 : 16; break; case 23: step = (c == 'a') ? 24 : 16; break; case 24: step = (c == 'm') ? 25 : 16; break; case 25: step = (c == '\r' || c == '\n' || c == ' ') ? 28 : 16; break; case 28: step = (c == 'e') ? 29 : 7; break; case 29: step = (c == 'n') ? 30 : 7; break; case 30: step = (c == 'd') ? 31 : 7; break; case 31: step = (c == 'o') ? 32 : 7; break; case 32: step = (c == 'b') ? 33 : 7; break; case 33: if (c == 'j') { step = 0; if (objIDCandidate == 140) { } oa = new ObjectAddress(objIDCandidate, addressCandidate , (int)(position - addressCandidate + 1), streamPositionCandidate, streamLengthCandidate); } else { step = 7; } break; } position++; } List <PdfFileObject> ObjStms = new List <PdfFileObject>(); foreach (PdfFileObject pfo in objects.Values) { if (pfo.Type == PdfObjectTypes.ObjStm) { ObjStms.Add(pfo); } } foreach (PdfFileObject ObjStm in ObjStms) { foreach (PdfFileObject pfo2 in ObjStm.GetStreamObjects()) { objects[pfo2.ObjID] = pfo2; } } return(objects); }
internal PdfFileObject CreateFileObject(PdfParser analyzer) { PdfFileObject pfo = new PdfFileObject(analyzer); pfo.ObjID = this.ObjID; pfo.OriginalObjectAbsolutePosition = this.Address; pfo.OriginalObjectLength = this.Length; pfo.StreamPosition = this.StreamPosition; pfo.StreamLength = this.StreamLength; return pfo; }
internal Dictionary<int, PdfFileObject> LoadObjects(Stream s) { s.Seek(0, SeekOrigin.Begin); Dictionary<int, PdfFileObject> objects = new Dictionary<int, PdfFileObject>(); int c = 0, objIDCandidate = 0, streamLengthCandidate = -1; long streamPositionCandidate = -2; long addressCandidate = -1, step = 0, position = 0; ObjectAddress oa = null; while ((c = s.ReadByte()) != -1) { switch (step) { case 0: if (oa != null) { if (c == ' ' || c == '\r' || c == '\n') { oa.Length++; } else { PdfFileObject pfo = oa.CreateFileObject(this); if (pfo.OriginalObjectAbsolutePosition == this.startxref) { this.trailer = pfo; } objects[oa.ObjID] = pfo; this.inputStream.Seek(position+1, SeekOrigin.Begin); oa = null; streamPositionCandidate = -2; streamLengthCandidate = -1; } } if (c > 48 && c < 58) { addressCandidate = position; objIDCandidate = c - 48; step = 1; } break; case 1: if (c > 47 && c < 58) { objIDCandidate = objIDCandidate * 10 + c - 48; } else { step = (c == ' ') ? 2 : 0; } break; case 2: step = (c == '0') ? 3 : 0; break; case 3: step = (c == ' ') ? 4 : 0; break; case 4: step = (c == 'o') ? 5 : 0; break; case 5: step = (c == 'b') ? 6 : 0; break; case 6: if (c == 'j') { step = 7; } else { step = 0; } break; case 7: if (objIDCandidate !=lastObjID) { lastObjID = objIDCandidate; } if (objIDCandidate == 8) { if (c == '>') { } } if (c == 'e') { // possible beginning of stream step = 29; } if (c == 's') { step = 9; break; } break; case 8: if (c == 's') { step++; break; } if (c == 'e') { step = 29; break; } if (c == '\r' || c == '\n' || c == ' ') { break; } step = 7; break; case 9: step = (c == 't') ? 10 : 7; break; case 10: step = (c == 'r') ? 11 : 7; break; case 11: step = (c == 'e') ? 12 : 7; break; case 12: step = (c == 'a') ? 13 : 7; break; case 13: step = (c == 'm') ? 14 : 7; break; case 14: step = (c == '\r' || c == '\n' || c == ' ') ? 15 : 7; break; case 15: if (c == '\r' ||c == '\n' || c == ' ') { } else { step++; streamPositionCandidate = position; } break; case 16: if (c == 'e') { step++; break; } streamLengthCandidate = (int)(position - streamPositionCandidate + 1); break; case 17: step = (c == 'n') ? 18 : 16; break; case 18: step = (c == 'd') ? 19 : 16; break; case 19: if (c == 'o') { step = 32; break; } if (c == 's') { step = 20; break; } step = 16; break; case 20: step = (c == 't') ? 21 : 16; break; case 21: step = (c == 'r') ? 22 : 16; break; case 22: step = (c == 'e') ? 23 : 16; break; case 23: step = (c == 'a') ? 24 : 16; break; case 24: step = (c == 'm') ? 25 : 16; break; case 25: step = (c == '\r' || c == '\n' || c == ' ') ? 28 : 16; break; case 28: step = (c == 'e') ? 29 : 7; break; case 29: step = (c == 'n') ? 30 : 7; break; case 30: step = (c == 'd') ? 31 : 7; break; case 31: step = (c == 'o') ? 32 : 7; break; case 32: step = (c == 'b') ? 33 : 7; break; case 33: if (c == 'j') { step = 0; if (objIDCandidate == 140) { } oa = new ObjectAddress(objIDCandidate, addressCandidate ,(int)( position - addressCandidate+1),streamPositionCandidate,streamLengthCandidate); } else { step = 7; } break; } position++; } List<PdfFileObject> ObjStms = new List<PdfFileObject>(); foreach (PdfFileObject pfo in objects.Values) { if (pfo.Type == PdfObjectTypes.ObjStm) { ObjStms.Add(pfo); } } foreach (PdfFileObject ObjStm in ObjStms) { foreach (PdfFileObject pfo2 in ObjStm.GetStreamObjects()) { objects[pfo2.ObjID] = pfo2; } } return objects; }
private PdfFileObject ParseTrailer(long xref) { PdfFileObject pfo = new PdfFileObject(this); pfo.OriginalText = ""; pfo.ObjID = 0; pfo.Type = PdfObjectTypes.Trailer; this.inputStream.Seek(xref, SeekOrigin.Begin); StreamReader sr = new StreamReader(this.inputStream); string line; bool istrailer = false; while ((line = sr.ReadLine()) != null) { line = line.Trim(); if (line == "startxref") { return pfo; } if (line.StartsWith("trailer")) { pfo.OriginalText = ""; istrailer = true; } if (istrailer) { pfo.OriginalText += line + "\r"; } } throw new Exception("Cannot find trailer"); }
public void Add(PdfParser pdfParser, int[] PageNumbers) { if (this.CancelPending) { return; } this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount, 0, 0, "Analyzing PDF Structure")); PdfFileObject[] pages = pdfParser.GetAllPages(); ArrayList selectedPages = new ArrayList(); #region gets needed objects Hashtable relatedObjects = new Hashtable(); if (PageNumbers == null) { List <int> ps = new List <int>(); for (int p = 0; p < pages.Length; p++) { ps.Add(p); } PageNumbers = ps.ToArray(); } int currentPageIndex = 1, pageCount = PageNumbers.Length; int step = Math.Max(pageCount / 20, 10); foreach (int pageNumber in PageNumbers) { if (this.CancelPending) { return; } if (currentPageIndex % step == 0 || currentPageIndex == pageCount) { this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount, currentPageIndex, pageCount, "Analyzing Page {0} of {1}")); } PdfFileObject selectedPage = pages[pageNumber]; selectedPages.Add(selectedPage); selectedPage.PopulateRelatedObjects(relatedObjects); currentPageIndex++; } ArrayList neededObjects = new ArrayList(); neededObjects.AddRange(relatedObjects.Values); neededObjects.Sort(new PdfFileObjectNumberComparer()); #endregion #region creates IDs transformation table int objectIndex = 1; int objectCount = neededObjects.Count; step = Math.Max(objectCount / 20, 50); Hashtable transformationTable = new Hashtable(); foreach (PdfFileObject pfo in neededObjects) { if (this.CancelPending) { return; } if (objectIndex % step == 0 || objectIndex == objectCount) { this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount, objectIndex, objectCount, "Rebuilding Indexes for object {0} of {1}")); } string hash = OptimizeStreams?pfo.Hash:null; if (hash != null && this.alreadyUsedObjects.ContainsKey(hash)) { pfo.excludedByHashComparison = true; transformationTable.Add(pfo.ObjID, (int)this.alreadyUsedObjects[hash]); } else { number++; transformationTable.Add(pfo.ObjID, number); if (hash != null) { this.alreadyUsedObjects.Add(hash, number); } } objectIndex++; } #endregion objectIndex = 1; foreach (PdfFileObject pfo in neededObjects) { if (this.CancelPending) { return; } if (objectIndex % step == 0 || objectIndex == objectCount) { this.PdfMergerProgress(this, new PdfMergerProgressEventArgs(this.elementCount, objectIndex, objectCount, "Writing object {0} of {1}")); } pfo.Transform(transformationTable); if (!pfo.excludedByHashComparison) { this.xrefs.Add(pos); this.pos += pfo.WriteToStream(this.target); } pfo.TransformatedTextPart = null; objectIndex++; } foreach (PdfFileObject selectedPage in selectedPages) { this.pageNumbers.Add(selectedPage.TransformatedObjID); } this.elementCount++; }