示例#1
0
        /**
         * When an MCID is encountered, the parser will check the list
         * structure items and turn an annotation into an XObject if
         * necessary.
         * @param mcid	the MCID that was encountered in the content stream
         * @throws IOException
         */
        virtual protected void DealWithMcid(PdfNumber mcid)
        {
            if (mcid == null)
            {
                return;
            }
            StructureItem item = items[0];

            if (LOGGER.IsLogging(Level.INFO))
            {
                LOGGER.Info(String.Format("Encountered MCID {0} in content, comparing with {1}", mcid, item));
            }
            switch (item.CheckMCID(pageref.Number, mcid.IntValue))
            {
            case 0:
                StructureObject obj = (StructureObject)item;
                ConvertToXObject(obj);
                LOGGER.Info("Removed structure item from stack.");
                items.RemoveAt(0);
                DealWithMcid(mcid);
                return;

            case 1:
                LOGGER.Info("Removed structure item from stack.");
                items.RemoveAt(0);
                return;

            default:
                LOGGER.Warn("MCID not found! There's probably an error in your form!");
                // hack to deal with MCIDs that are added in the wrong order
                int check;
                for (int i = 1; i < items.Count; i++)
                {
                    item  = items[i];
                    check = item.CheckMCID(pageref.Number, mcid.IntValue);
                    switch (check)
                    {
                    case 1:
                        LOGGER.Info("Removed structure item from stack.");
                        items.RemoveAt(i);
                        return;

                    case 0:
                        break;
                    }
                }
                throw new DocumentException(MessageLocalization.GetComposedMessage("can.t.read.document.structure"));
            }
        }
示例#2
0
        /**
         * When an XObject with a StructParent is encountered,
         * we want to remove it from the stack.
         * @param xobj	the name of an XObject
         */
        protected void DealWithXObj(PdfName xobj)
        {
            PdfDictionary dict         = xobjects.GetAsStream(xobj);
            PdfNumber     structParent = dict.GetAsNumber(PdfName.STRUCTPARENT);

            LOGGER.Info(String.Format("Encountered StructParent {0} in content", structParent));
            if (structParent == null)
            {
                return;
            }
            StructureItem item = items[0];

            if (item.CheckStructParent(pageref.Number, structParent.IntValue) == 1)
            {
                items.RemoveAt(0);
            }
        }
示例#3
0
        /**
         * Parses the content of a page, replacing appearances of annotations
         * with Form XObjects.
         * @param page a page dictionary
         * @throws IOException
         */
        public void Parse(PdfDictionary page, PdfIndirectReference pageref)
        {
            LOGGER.Info("Parsing page with reference " + pageref);
            // initializing member variables
            baos         = new MemoryStream();
            this.page    = page;
            this.pageref = pageref;

            structParents = page.GetAsNumber(PdfName.STRUCTPARENTS);
            if (structParents == null)
            {
                throw new DocumentException(MessageLocalization.GetComposedMessage("can.t.read.document.structure"));
            }
            annots = page.GetAsArray(PdfName.ANNOTS);
            if (annots == null)
            {
                annots = new PdfArray();
            }
            PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES);

            xobjects = resources.GetAsDict(PdfName.XOBJECT);
            if (xobjects == null)
            {
                xobjects = new PdfDictionary();
                resources.Put(PdfName.XOBJECT, xobjects);
            }
            // parsing the content stream of the page
            PRStream stream = (PRStream)page.GetAsStream(PdfName.CONTENTS);

            byte[]           contentBytes = PdfReader.GetStreamBytes(stream);
            PRTokeniser      tokeniser    = new PRTokeniser(new RandomAccessFileOrArray(RASFACTORY.CreateSource(contentBytes)));
            PdfContentParser ps           = new PdfContentParser(tokeniser);
            List <PdfObject> operands     = new List <PdfObject>();

            while (ps.Parse(operands).Count > 0)
            {
                PdfLiteral opr = (PdfLiteral)operands[operands.Count - 1];
                ProcessOperator(opr, operands);
            }
            // dealing with orphans
            while (items.Count > 0 && items[0].GetPageref() == pageref.Number)
            {
                StructureItem item = items[0];
                if (item is StructureObject)
                {
                    ConvertToXObject((StructureObject)item);
                    items.RemoveAt(0);
                }
            }
            if (annots.Length == 0)
            {
                page.Remove(PdfName.ANNOTS);
            }
            else
            {
                PdfDictionary annot;
                for (int i = 0; i < annots.Size; i++)
                {
                    annot = annots.GetAsDict(i);
                    if (annot.GetAsNumber(PdfName.STRUCTPARENT) == null)
                    {
                        throw new DocumentException(MessageLocalization.GetComposedMessage("could.not.flatten.file.untagged.annotations.found"));
                    }
                }
            }
            // replacing the content stream
            baos.Flush();
            baos.Close();
            stream.SetData(baos.ToArray());
            // showing how many items are left
            LOGGER.Info(String.Format("There are {0} items left for processing", items.Count));
        }