private static int SeparateStructure(PdfDocument document, int startPage, int beforePage, int startPageStructTopIndex ) { if (!document.IsTagged() || 1 > startPage || startPage > beforePage || beforePage > document.GetNumberOfPages () + 1) { return -1; } else { if (beforePage == startPage) { return startPageStructTopIndex; } else { if (beforePage == document.GetNumberOfPages() + 1) { return document.GetStructTreeRoot().GetKidsObject().Size(); } } } // Here we separate the structure tree in two parts: struct elems that belong to the pages which indexes are // less then separateBeforePage and those struct elems that belong to other pages. Some elems might belong // to both parts and actually these are the ones that we are looking for. ICollection<PdfObject> firstPartElems = new HashSet<PdfObject>(); for (int i = startPage; i < beforePage; ++i) { PdfPage pageOfFirstHalf = document.GetPage(i); ICollection<PdfMcr> pageMcrs = document.GetStructTreeRoot().GetPageMarkedContentReferences(pageOfFirstHalf ); if (pageMcrs != null) { foreach (PdfMcr mcr in pageMcrs) { firstPartElems.Add(mcr.GetPdfObject()); PdfDictionary top = AddAllParentsToSet(mcr, firstPartElems); if (top != null && top.IsFlushed()) { throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags); } } } } IList<PdfDictionary> clonedTops = new List<PdfDictionary>(); PdfArray tops = document.GetStructTreeRoot().GetKidsObject(); // Now we "walk" through all the elems which belong to the first part, and look for the ones that contain both // kids from first and second part. We clone found elements and move kids from the second part to cloned elems. int lastTopBefore = startPageStructTopIndex - 1; for (int i = 0; i < tops.Size(); ++i) { PdfDictionary top = tops.GetAsDictionary(i); if (firstPartElems.Contains(top)) { lastTopBefore = i; StructureTreeCopier.LastClonedAncestor lastCloned = new StructureTreeCopier.LastClonedAncestor(); lastCloned.ancestor = top; PdfDictionary topClone = top.Clone(ignoreKeysForClone); topClone.Put(PdfName.P, document.GetStructTreeRoot().GetPdfObject()); lastCloned.clone = topClone; SeparateKids(top, firstPartElems, lastCloned, document); if (topClone.ContainsKey(PdfName.K)) { topClone.MakeIndirect(document); clonedTops.Add(topClone); } } } for (int i = 0; i < clonedTops.Count; ++i) { document.GetStructTreeRoot().AddKidObject(lastTopBefore + 1 + i, clonedTops[i]); } return lastTopBefore + 1; }
private static PdfDictionary ProcessFilters(PdfDictionary dictionary) { PdfDictionary result; // Create a dictionary mapping (i.e. switch statement) to process the expected filters. var map = new Dictionary <string, Func <byte[], byte[]> >() { { "/FlateDecode", (d) => { var decoder = new FlateDecode(); return(decoder.Decode(d)); } } }; // Get all of the filters. var filters = ((PdfArray)dictionary.Elements["/Filter"]) .Elements.Where(e => e.IsName()) .Select(e => ((PdfName)e).Value) .ToList(); // If only one filter in array. Just rewrite the /Filter if (filters.Count == 1) { result = dictionary.Clone(); result.Elements["/Filter"] = new PdfName(filters[0]); return(result); } // Process each filter in order. The last filter should be the actual encoded image. byte[] data = dictionary.Stream.Value; for (int index = 0; index < (filters.Count - 1); index++) { if (!map.ContainsKey(filters[index])) { throw new NotSupportedException(String.Format("Encountered embedded image with multiple filters: \"{0}\". Unable to process the filter: \"{1}\".", String.Join(",", filters), filters[index])); } data = map[filters[index]].Invoke(data); } result = new PdfDictionary(); result.Elements.Add("/Filter", new PdfName(filters.Last())); foreach (var element in dictionary.Elements.Where(e => !String.Equals(e.Key, "/Filter", StringComparison.OrdinalIgnoreCase))) { result.Elements.Add(element.Key, element.Value); } result.CreateStream(data); return(result); }
private static void CloneParents(PdfDictionary structElem, StructureTreeCopier.LastClonedAncestor lastCloned , PdfDocument document) { if (lastCloned.ancestor != structElem) { PdfDictionary structElemClone = (PdfDictionary)structElem.Clone(ignoreKeysForClone).MakeIndirect(document); PdfDictionary currClone = structElemClone; PdfDictionary currElem = structElem; while (currElem.Get(PdfName.P) != lastCloned.ancestor) { PdfDictionary parent = currElem.GetAsDictionary(PdfName.P); PdfDictionary parentClone = (PdfDictionary)parent.Clone(ignoreKeysForClone).MakeIndirect(document); currClone.Put(PdfName.P, parentClone); parentClone.Put(PdfName.K, currClone); currClone = parentClone; currElem = parent; } PdfStructElem.AddKidObject(lastCloned.clone, -1, currClone); lastCloned.clone = structElemClone; lastCloned.ancestor = structElem; } }
private static PdfDictionary CopyObject(PdfDictionary source, ICollection <PdfObject> objectsToCopy, PdfDocument toDocument, IDictionary <PdfDictionary, PdfDictionary> page2page, bool copyFromDestDocument) { PdfDictionary copied; if (copyFromDestDocument) { copied = source.Clone(ignoreKeysForCopy); if (source.IsIndirect()) { copied.MakeIndirect(toDocument); } } else { copied = source.CopyTo(toDocument, ignoreKeysForCopy, true); } if (source.ContainsKey(PdfName.Obj)) { PdfDictionary obj = source.GetAsDictionary(PdfName.Obj); if (!copyFromDestDocument && obj != null) { // Link annotations could be not added to the toDocument, so we need to identify this case. // When obj.copyTo is called, and annotation was already copied, we would get this already created copy. // If it was already copied and added, /P key would be set. Otherwise /P won't be set. obj = obj.CopyTo(toDocument, iText.IO.Util.JavaUtil.ArraysAsList(PdfName.P), false); copied.Put(PdfName.Obj, obj); } } PdfDictionary pg = source.GetAsDictionary(PdfName.Pg); if (pg != null) { //TODO It is possible, that pg will not be present in the page2page map. Consider the situation, // that we want to copy structElem because it has marked content dictionary reference, which belongs to the page from page2page, // but the structElem itself has /Pg which value could be arbitrary page. copied.Put(PdfName.Pg, page2page.Get(pg)); } PdfObject k = source.Get(PdfName.K); if (k != null) { if (k.IsArray()) { PdfArray kArr = (PdfArray)k; PdfArray newArr = new PdfArray(); for (int i = 0; i < kArr.Size(); i++) { PdfObject copiedKid = CopyObjectKid(kArr.Get(i), copied, objectsToCopy, toDocument, page2page, copyFromDestDocument ); if (copiedKid != null) { newArr.Add(copiedKid); } } // TODO new array may be empty or with single element copied.Put(PdfName.K, newArr); } else { PdfObject copiedKid = CopyObjectKid(k, copied, objectsToCopy, toDocument, page2page, copyFromDestDocument); if (copiedKid != null) { copied.Put(PdfName.K, copiedKid); } } } return(copied); }
/// <summary> /// Copies structure to a /// <paramref name="destDocument"/> /// and insert it in a specified position in the document. /// <br/><br/> /// NOTE: Works only for /// <c>PdfStructTreeRoot</c> /// that is read from the document opened in reading mode, /// otherwise an exception is thrown. /// <br/> /// Also, to insert a tagged page into existing tag structure, existing tag structure shouldn't be flushed, otherwise /// an exception may be raised. /// </summary> /// <param name="destDocument">document to copy structure to.</param> /// <param name="insertBeforePage">indicates where the structure to be inserted.</param> /// <param name="page2page">association between original page and copied page.</param> public static void CopyTo(PdfDocument destDocument, int insertBeforePage, IDictionary <PdfPage, PdfPage> page2page , PdfDocument callingDocument) { if (!destDocument.IsTagged()) { return; } // Here we separate the structure tree in two parts: struct elems that belong to the pages which indexes are // less then insertBeforePage and those struct elems that belong to other pages. Some elems might belong // to both parts and actually these are the ones that we are looking for. ICollection <PdfObject> firstPartElems = new HashSet <PdfObject>(); PdfStructTreeRoot destStructTreeRoot = destDocument.GetStructTreeRoot(); for (int i = 1; i < insertBeforePage; ++i) { PdfPage pageOfFirstHalf = destDocument.GetPage(i); ICollection <PdfMcr> pageMcrs = destStructTreeRoot.GetPageMarkedContentReferences(pageOfFirstHalf); if (pageMcrs != null) { foreach (PdfMcr mcr in pageMcrs) { firstPartElems.Add(mcr.GetPdfObject()); PdfDictionary top = AddAllParentsToSet(mcr, firstPartElems); if (top.IsFlushed()) { throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags); } } } } IList <PdfDictionary> clonedTops = new List <PdfDictionary>(); PdfArray tops = destStructTreeRoot.GetKidsObject(); // Now we "walk" through all the elems which belong to the first part, and look for the ones that contain both // kids from first and second part. We clone found elements and move kids from the second part to cloned elems. int lastTopBefore = 0; for (int i_1 = 0; i_1 < tops.Size(); ++i_1) { PdfDictionary top = tops.GetAsDictionary(i_1); if (firstPartElems.Contains(top)) { lastTopBefore = i_1; StructureTreeCopier.LastClonedAncestor lastCloned = new StructureTreeCopier.LastClonedAncestor(); lastCloned.ancestor = top; PdfDictionary topClone = top.Clone(ignoreKeysForClone); topClone.Put(PdfName.P, destStructTreeRoot.GetPdfObject()); lastCloned.clone = topClone; SeparateKids(top, firstPartElems, lastCloned); if (topClone.ContainsKey(PdfName.K)) { topClone.MakeIndirect(destDocument); clonedTops.Add(topClone); } } } for (int i_2 = 0; i_2 < clonedTops.Count; ++i_2) { destStructTreeRoot.AddKidObject(lastTopBefore + 1 + i_2, clonedTops[i_2]); } CopyTo(destDocument, page2page, callingDocument, false, lastTopBefore + 1); }
public override PdfObject Visit( PdfDictionary obj, object data ) { Filter cloneFilter = MatchFilter(obj); PdfDictionary clone = (PdfDictionary)obj.Clone(); { clone.entries = new Dictionary<PdfName,PdfDirectObject>(); foreach(KeyValuePair<PdfName,PdfDirectObject> entry in obj.entries) { PdfDirectObject sourceValue = entry.Value; if(cloneFilter.BeforeClone(this, clone, entry.Key, sourceValue)) { PdfDirectObject cloneValue; clone[entry.Key] = cloneValue = (PdfDirectObject)(sourceValue != null ? sourceValue.Accept(this, null) : null); cloneFilter.AfterClone(this, clone, entry.Key, cloneValue); } } } cloneFilter.AfterClone(this, clone, obj); return clone; }
private static PdfDictionary CopyObject(PdfDictionary source, PdfDictionary destPage, bool parentChangePg, StructureTreeCopier.StructElemCopyingParams copyingParams) { PdfDictionary copied; if (copyingParams.IsCopyFromDestDocument()) { //TODO: detect wether object is needed to be cloned at all copied = source.Clone(ignoreKeysForClone); if (source.IsIndirect()) { copied.MakeIndirect(copyingParams.GetToDocument()); } PdfDictionary pg = source.GetAsDictionary(PdfName.Pg); if (pg != null) { if (copyingParams.IsCopyFromDestDocument()) { if (pg != destPage) { copied.Put(PdfName.Pg, destPage); parentChangePg = true; } else { parentChangePg = false; } } } } else { copied = source.CopyTo(copyingParams.GetToDocument(), ignoreKeysForCopy, true); PdfDictionary obj = source.GetAsDictionary(PdfName.Obj); if (obj != null) { // Link annotations could be not added to the toDocument, so we need to identify this case. // When obj.copyTo is called, and annotation was already copied, we would get this already created copy. // If it was already copied and added, /P key would be set. Otherwise /P won't be set. obj = obj.CopyTo(copyingParams.GetToDocument(), JavaUtil.ArraysAsList(PdfName.P), false); copied.Put(PdfName.Obj, obj); } PdfDictionary nsDict = source.GetAsDictionary(PdfName.NS); if (nsDict != null) { PdfDictionary copiedNsDict = CopyNamespaceDict(nsDict, copyingParams); copied.Put(PdfName.NS, copiedNsDict); } PdfDictionary pg = source.GetAsDictionary(PdfName.Pg); if (pg != null) { PdfDictionary pageAnalog = copyingParams.GetPage2page().Get(pg); if (pageAnalog == null) { pageAnalog = destPage; parentChangePg = true; } else { parentChangePg = false; } copied.Put(PdfName.Pg, pageAnalog); } } PdfObject k = source.Get(PdfName.K); if (k != null) { if (k.IsArray()) { PdfArray kArr = (PdfArray)k; PdfArray newArr = new PdfArray(); for (int i = 0; i < kArr.Size(); i++) { PdfObject copiedKid = CopyObjectKid(kArr.Get(i), copied, destPage, parentChangePg, copyingParams); if (copiedKid != null) { newArr.Add(copiedKid); } } if (!newArr.IsEmpty()) { if (newArr.Size() == 1) { copied.Put(PdfName.K, newArr.Get(0)); } else { copied.Put(PdfName.K, newArr); } } } else { PdfObject copiedKid = CopyObjectKid(k, copied, destPage, parentChangePg, copyingParams); if (copiedKid != null) { copied.Put(PdfName.K, copiedKid); } } } return(copied); }