//////////////////////////////////////////////////////////////////////////////////////////////////
        // GetParagraphBBox
        // get the text state of the text objects inside paragraph by iterating content kid objects
        //////////////////////////////////////////////////////////////////////////////////////////////////
        private static bool GetStructElementBBox(PdsStructElement struct_elem, ref PdfRect bbox)
        {
            bool result = false;

            for (int i = 0; i < struct_elem.GetNumChildren(); i++)
            {
                if (struct_elem.GetChildType(i) == PdfStructElementType.kPdsStructChildPageContent)
                {
                    // acquire page on which the element is present
                    PdfDoc  doc  = struct_elem.GetStructTree().GetDoc();
                    PdfPage page = doc.AcquirePage(struct_elem.GetChildPageNumber(i));

                    // find text object with mcid on the page to get the text state
                    int mcid    = struct_elem.GetChildMcid(i);
                    var content = page.GetContent();
                    for (int j = 0; j < content.GetNumObjects(); j++)
                    {
                        PdsPageObject page_object = content.GetObject(j);

                        // check if this text page object has the same mcid
                        PdsContentMark content_mark = page_object.GetContentMark();
                        if (content_mark != null && content_mark.GetTagMcid() == mcid)
                        {
                            PdfRect elem_bbox = page_object.GetBBox();
                            if ((bbox.left - bbox.right == 0) || (bbox.top - bbox.bottom == 0))
                            {
                                bbox = elem_bbox;
                            }
                            else
                            {
                                bbox.left   = Math.Min(bbox.left, elem_bbox.left);
                                bbox.right  = Math.Max(bbox.right, elem_bbox.right);
                                bbox.top    = Math.Max(bbox.top, elem_bbox.top);
                                bbox.bottom = Math.Min(bbox.bottom, elem_bbox.bottom);
                            }
                            result = true;
                        }
                    }
                }
                else if (struct_elem.GetChildType(i) == PdfStructElementType.kPdsStructChildElement)
                {
                    PdsObject        kid_obj  = struct_elem.GetChildObject(i);
                    PdsStructElement kid_elem = struct_elem.GetStructTree().GetStructElementFromObject(kid_obj);
                    GetStructElementBBox(kid_elem, ref bbox);
                }
            }
            return(result);
        }
Example #2
0
        // collect all bounding boxes of the page object with specified mcid
        static List <PdfRect> GetMcidBBoxes(PdsPageObject obj, int mcid)
        {
            var bboxes = new List <PdfRect>();
            // check object mcid
            var content_mark = obj.GetContentMark();

            if (content_mark != null && content_mark.GetTagMcid() == mcid)
            {
                bboxes.Add(obj.GetBBox());
            }
            else
            {
                if (obj.GetObjectType() == PdfPageObjectType.kPdsPageForm)
                {
                    var form_obj = (PdsForm)obj;
                    for (var i = 0; i < form_obj.GetNumPageObjects(); i++)
                    {
                        bboxes.AddRange(GetMcidBBoxes(form_obj.GetPageObject(i), mcid));
                    }
                }
            }
            return(bboxes);
        }