/** * When an MCID is encountered, the parser will check the list * structure items and turn an annotation into an XObject if * necessary. * @param mcid the MCID that was encountered in the content stream * @throws IOException */ virtual protected void DealWithMcid(PdfNumber mcid) { if (mcid == null) { return; } StructureItem item = items[0]; if (LOGGER.IsLogging(Level.INFO)) { LOGGER.Info(String.Format("Encountered MCID {0} in content, comparing with {1}", mcid, item)); } switch (item.CheckMCID(pageref.Number, mcid.IntValue)) { case 0: StructureObject obj = (StructureObject)item; ConvertToXObject(obj); LOGGER.Info("Removed structure item from stack."); items.RemoveAt(0); DealWithMcid(mcid); return; case 1: LOGGER.Info("Removed structure item from stack."); items.RemoveAt(0); return; default: LOGGER.Warn("MCID not found! There's probably an error in your form!"); // hack to deal with MCIDs that are added in the wrong order int check; for (int i = 1; i < items.Count; i++) { item = items[i]; check = item.CheckMCID(pageref.Number, mcid.IntValue); switch (check) { case 1: LOGGER.Info("Removed structure item from stack."); items.RemoveAt(i); return; case 0: break; } } throw new DocumentException(MessageLocalization.GetComposedMessage("can.t.read.document.structure")); } }
/** * When an XObject with a StructParent is encountered, * we want to remove it from the stack. * @param xobj the name of an XObject */ protected void DealWithXObj(PdfName xobj) { PdfDictionary dict = xobjects.GetAsStream(xobj); PdfNumber structParent = dict.GetAsNumber(PdfName.STRUCTPARENT); LOGGER.Info(String.Format("Encountered StructParent {0} in content", structParent)); if (structParent == null) { return; } StructureItem item = items[0]; if (item.CheckStructParent(pageref.Number, structParent.IntValue) == 1) { items.RemoveAt(0); } }
/** * Parses the content of a page, replacing appearances of annotations * with Form XObjects. * @param page a page dictionary * @throws IOException */ public void Parse(PdfDictionary page, PdfIndirectReference pageref) { LOGGER.Info("Parsing page with reference " + pageref); // initializing member variables baos = new MemoryStream(); this.page = page; this.pageref = pageref; structParents = page.GetAsNumber(PdfName.STRUCTPARENTS); if (structParents == null) { throw new DocumentException(MessageLocalization.GetComposedMessage("can.t.read.document.structure")); } annots = page.GetAsArray(PdfName.ANNOTS); if (annots == null) { annots = new PdfArray(); } PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES); xobjects = resources.GetAsDict(PdfName.XOBJECT); if (xobjects == null) { xobjects = new PdfDictionary(); resources.Put(PdfName.XOBJECT, xobjects); } // parsing the content stream of the page PRStream stream = (PRStream)page.GetAsStream(PdfName.CONTENTS); byte[] contentBytes = PdfReader.GetStreamBytes(stream); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(RASFACTORY.CreateSource(contentBytes))); PdfContentParser ps = new PdfContentParser(tokeniser); List <PdfObject> operands = new List <PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral opr = (PdfLiteral)operands[operands.Count - 1]; ProcessOperator(opr, operands); } // dealing with orphans while (items.Count > 0 && items[0].GetPageref() == pageref.Number) { StructureItem item = items[0]; if (item is StructureObject) { ConvertToXObject((StructureObject)item); items.RemoveAt(0); } } if (annots.Length == 0) { page.Remove(PdfName.ANNOTS); } else { PdfDictionary annot; for (int i = 0; i < annots.Size; i++) { annot = annots.GetAsDict(i); if (annot.GetAsNumber(PdfName.STRUCTPARENT) == null) { throw new DocumentException(MessageLocalization.GetComposedMessage("could.not.flatten.file.untagged.annotations.found")); } } } // replacing the content stream baos.Flush(); baos.Close(); stream.SetData(baos.ToArray()); // showing how many items are left LOGGER.Info(String.Format("There are {0} items left for processing", items.Count)); }