////////////////////////////////////////////////////////////////////////////////////////////////// // GetPageObjectTextState // get text object's of specified mcid the text state ////////////////////////////////////////////////////////////////////////////////////////////////// static PdfTextState GetPageObjectTextState(PdsPageObject page_object, int mcid) { if (page_object.GetObjectType() == PdfPageObjectType.kPdsPageText) { PdsText text = (PdsText)page_object; // check if this text page object has the same mcid PdsContentMark content_mark = page_object.GetContentMark(); if (content_mark != null && content_mark.GetTagMcid() == mcid) { PdfTextState ts = text.GetTextState(); if (ts.font != null) { return(ts); } } } else if (page_object.GetObjectType() == PdfPageObjectType.kPdsPageForm) { // search for the text object inside of the form XObject PdsForm form = (PdsForm)page_object; var content = form.GetContent(); for (int i = 0; i < content.GetNumObjects(); i++) { var ts = GetPageObjectTextState(content.GetObject(i), mcid); if (ts.font != null) { return(ts); } } } return(new PdfTextState()); }
private static void ProcessPageObject(PdfPage page, PdsPageObject obj, string savePath) { if (obj == null) { PdfixEngine.ThrowException(); } switch (obj.GetObjectType()) { case PdfPageObjectType.kPdsPageImage: ExtractImage(page, (PdsImage)obj, savePath); break; case PdfPageObjectType.kPdsPageForm: { var form = (PdsForm)obj; var content = form.GetContent(); for (int i = 0; i < content.GetNumObjects(); i++) { ProcessPageObject(page, content.GetObject(i), savePath); } } break; } }
// check page object if it fits to internal static void CheckPageObject(PdsPageObject page_object, KeyValuePair <string, int> layer) { switch (page_object.GetObjectType()) { case PdfPageObjectType.kPdsPageText: { var text_obj = (PdsText)page_object; string text = text_obj.GetText(); var layers = GetPageObjectLayers(page_object); foreach (KeyValuePair <string, int> l in layers) { if (layer.Key == l.Key && layer.Value == l.Value) { Console.WriteLine(text); break; } } } break; } }
// collect all bounding boxes of the page object with specified mcid static List <PdfRect> GetMcidBBoxes(PdsPageObject obj, int mcid) { var bboxes = new List <PdfRect>(); // check object mcid var content_mark = obj.GetContentMark(); if (content_mark != null && content_mark.GetTagMcid() == mcid) { bboxes.Add(obj.GetBBox()); } else { if (obj.GetObjectType() == PdfPageObjectType.kPdsPageForm) { var form_obj = (PdsForm)obj; for (var i = 0; i < form_obj.GetNumPageObjects(); i++) { bboxes.AddRange(GetMcidBBoxes(form_obj.GetPageObject(i), mcid)); } } } return(bboxes); }
private static void ProcessPageObject(PdfPage page, PdsPageObject obj, string savePath) { if (obj == null) { throw new Exception(_pdfix.GetError()); } switch (obj.GetObjectType()) { case PdfPageObjectType.kPdsPageImage: ExtractImage(page, (PdsImage)obj, savePath); break; case PdfPageObjectType.kPdsPageForm: { var form = (PdsForm)obj; for (int i = 0; i < form.GetNumPageObjects(); i++) { ProcessPageObject(page, form.GetPageObject(i), savePath); } } break; } }