public static void Run( String openPath, // source PDF document String savePath, // output TXT document String configPath // configuration file ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // iterate through pages and parse each page individually for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } ParsePage(pdfix, page, savePath); page.Release(); } Console.WriteLine(tableIndex + " tables detected"); doc.Close(); }
public static void Run( String openPath // source PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } var rootObj = doc.GetRootObject(); var layers = ReadOCGLayers.ReadLayerNames(rootObj); var page = doc.AcquirePage(0); foreach (KeyValuePair <string, int> layer in layers) { Console.WriteLine("Text in layer " + layer.Key + "(" + layer.Value.ToString() + ")"); var content = page.GetContent(); for (var i = 0; i < content.GetNumObjects(); i++) { var page_obj = content.GetObject(i); CheckPageObject(page_obj, layer); } } page.Release(); doc.Close(); }
////////////////////////////////////////////////////////////////////////////////////////////////// // GetParagraphTextState // get the text state of the text objects inside paragraph by iterating content kid objects ////////////////////////////////////////////////////////////////////////////////////////////////// static PdfTextState GetParagraphTextState(PdsStructElement struct_elem) { for (int i = 0; i < struct_elem.GetNumKids(); i++) { if (struct_elem.GetKidType(i) == PdfStructElementType.kPdsStructKidPageContent) { // acquire page on which the element is present PdfDoc doc = struct_elem.GetStructTree().GetDoc(); PdfPage page = doc.AcquirePage(struct_elem.GetKidPageNumber(i)); // find text object with mcid on the page to get the text state int mcid = struct_elem.GetKidMcid(i); var num_pages = page.GetNumPageObjects(); for (int j = 0; j < page.GetNumPageObjects(); j++) { var ts = GetPageObjectTextState(page.GetPageObject(j), mcid); // Handled by MIkhaylov KS if (ts.font_size == 0) { continue; } page.Release(); return(ts); } page.Release(); } } return(new PdfTextState()); }
private static void ExtractDocumentPages(PdfDoc doc, JObject node, DataType data_types) { var pages_node = new JArray(); // node holding the page array var from_page = data_types.page_num == -1 ? 0 : data_types.page_num; var to_page = data_types.page_num == -1 ? doc.GetNumPages() - 1 : data_types.page_num; for (var i = from_page; i <= to_page; i++) { var page = doc.AcquirePage(i); if (page == null) { throw new Exception(new Pdfix().GetError()); } var page_node = new JObject(); // node holding the page page_node.Add("page_number", i + 1); ExtractPageData(page, page_node, data_types); if (page_node.Count > 0) { pages_node.Add(page_node); } } if (pages_node.Count > 0) { node.Add("pages", pages_node); } }
public static void Run( String openPath, // source PDF document String savePath ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } for (int i = 0; i < doc.GetNumPages(); i++) { var page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } var content = page.GetContent(); for (int j = 0; j < content.GetNumObjects(); j++) { ProcessPageObject(page, content.GetObject(j), savePath); } page.Release(); } doc.Close(); }
public static void Run( String openPath // source PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // iterate through pages and parse each page individually for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } ParsePage(pdfix, page); page.Release(); } doc.Close(); }
public static void Run( String openPath, // source PDF document String savePath // dest PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } // tag text on the bottom of the page as artifact for (int i = 0; i < struct_tree.GetNumChildren(); i++) { PdsObject kid_obj = struct_tree.GetChildObject(i); PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj); RemoveParagraph(kid_elem); } // the struct tree was updates, save page content on each page to apply changes for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); MarkUntaggedObjectsAsArtifact(page); page.Release(); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
// collect bounding boxes of elements on a page with specified mcid static List <PdfRect> GetMcidBBoxes(PdfDoc doc, int page_num, int mcid) { var bboxes = new List <PdfRect>(); var page = doc.AcquirePage(page_num); for (var i = 0; i < page.GetNumPageObjects(); i++) { bboxes.AddRange(GetMcidBBoxes(page.GetPageObject(i), mcid)); } page.Release(); return(bboxes); }
public static void Run( String openPath, // source PDF document String savePath // output PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } PdfPage page = doc.AcquirePage(0); if (page == null) { throw new Exception(pdfix.GetError()); } PdfRect cropBox = page.GetCropBox(); // place annotation to the middle of the page PdfRect annotRect = new PdfRect(); annotRect.left = (float)((cropBox.right + cropBox.left) / 2.0) - 10; annotRect.bottom = (float)((cropBox.top + cropBox.bottom) / 2.0) - 10; annotRect.right = (float)((cropBox.right + cropBox.left) / 2.0) + 10; annotRect.top = (float)((cropBox.top + cropBox.bottom) / 2.0) + 10; PdfTextAnnot annot = (PdfTextAnnot)page.CreateAnnot(PdfAnnotSubtype.kAnnotText, annotRect); page.AddAnnot(-1, annot); if (annot == null) { throw new Exception(pdfix.GetError()); } annot.SetAuthor(@"Peter Brown"); annot.SetContents(@"This is my comment."); annot.AddReply(@"Mark Fish", @"This is some reply."); page.Release(); if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
////////////////////////////////////////////////////////////////////////////////////////////////// // GetParagraphBBox // get the text state of the text objects inside paragraph by iterating content kid objects ////////////////////////////////////////////////////////////////////////////////////////////////// private static bool GetStructElementBBox(PdsStructElement struct_elem, ref PdfRect bbox) { bool result = false; for (int i = 0; i < struct_elem.GetNumChildren(); i++) { if (struct_elem.GetChildType(i) == PdfStructElementType.kPdsStructChildPageContent) { // acquire page on which the element is present PdfDoc doc = struct_elem.GetStructTree().GetDoc(); PdfPage page = doc.AcquirePage(struct_elem.GetChildPageNumber(i)); // find text object with mcid on the page to get the text state int mcid = struct_elem.GetChildMcid(i); var content = page.GetContent(); for (int j = 0; j < content.GetNumObjects(); j++) { PdsPageObject page_object = content.GetObject(j); // check if this text page object has the same mcid PdsContentMark content_mark = page_object.GetContentMark(); if (content_mark != null && content_mark.GetTagMcid() == mcid) { PdfRect elem_bbox = page_object.GetBBox(); if ((bbox.left - bbox.right == 0) || (bbox.top - bbox.bottom == 0)) { bbox = elem_bbox; } else { bbox.left = Math.Min(bbox.left, elem_bbox.left); bbox.right = Math.Max(bbox.right, elem_bbox.right); bbox.top = Math.Max(bbox.top, elem_bbox.top); bbox.bottom = Math.Min(bbox.bottom, elem_bbox.bottom); } result = true; } } } else if (struct_elem.GetChildType(i) == PdfStructElementType.kPdsStructChildElement) { PdsObject kid_obj = struct_elem.GetChildObject(i); PdsStructElement kid_elem = struct_elem.GetStructTree().GetStructElementFromObject(kid_obj); GetStructElementBBox(kid_elem, ref bbox); } } return(result); }
static void DoSomething() { Console.WriteLine("{0} = waiting", Thread.CurrentThread.Name); semaphore.WaitOne(); Console.WriteLine("{0} begins!", Thread.CurrentThread.Name); PdfPage page = doc.AcquirePage(0); if (page == null) { throw new Exception(pdfix.GetError()); } Thread.Sleep(1000); page.Release(); Console.WriteLine("{0} releasing...", Thread.CurrentThread.Name); semaphore.Release(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath, // output TXT document String configPath // configuration file ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } StreamWriter file = new System.IO.StreamWriter(savePath); // iterate through pages and parse each page individually for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } ParsePage(pdfix, page, file); page.Release(); } file.Close(); doc.Close(); pdfix.Destroy(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } _pdfix = pdfix; if (!pdfix.Authorize(email, licenseKey)) { throw new Exception("Authorization fail. " + pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } for (int i = 0; i < doc.GetNumPages(); i++) { var page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } for (int j = 0; j < page.GetNumPageObjects(); j++) { ProcessPageObject(page, page.GetPageObject(i), savePath); } page.Release(); } doc.Close(); pdfix.Destroy(); }
private static void ProcessFormFieldsViaPages(PdfDoc doc) { _tabOrder = 0; // display form field names in th eorder of the tab order for (int i = 0; i < doc.GetNumPages(); i++) { var page = doc.AcquirePage(i); for (int j = 0; j < page.GetNumAnnots(); j++) { var annot = page.GetAnnot(j); if (annot.GetSubtype() == PdfAnnotSubtype.kAnnotWidget) { var widget = (PdfWidgetAnnot)annot; GetWidgetAnnotProperties(page, widget); } } page.Release(); } }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath // source PDF document ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } var rootObj = doc.GetRootObject(); var layers = ReadOCGLayers.ReadLayerNames(rootObj); var page = doc.AcquirePage(0); foreach (KeyValuePair <string, int> layer in layers) { Console.WriteLine("Text in layer " + layer.Key + "(" + layer.Value.ToString() + ")"); for (var i = 0; i < page.GetNumPageObjects(); i++) { var page_obj = page.GetPageObject(i); CheckPageObject(page_obj, layer); } } page.Release(); doc.Close(); pdfix.Destroy(); }
public static void Run( String openPath, // source PDF document String savePath, // output XML document String configPath // configuration file ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } StreamWriter file = new System.IO.StreamWriter(savePath); //pdfix.CreateCustomStream() // XML headers file.Write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); file.Write("<!--Created from PDF via PDFix-->\n"); file.Write("<Document>\n"); PsMetadata metadata = doc.GetMetadata(); // iterate through pages and parse each page individually for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } ParsePage(pdfix, page, file); page.Release(); } file.Close(); doc.Close(); }
public static void Run( String openPath, // source PDF document String savePath // output PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } for (int i = 0; i < doc.GetNumPages(); i++) { var page = doc.AcquirePage(i); for (int j = page.GetNumAnnots() - 1; j >= 0; j--) { var annot = page.GetAnnot(j); if (annot == null || annot.GetSubtype() == PdfAnnotSubtype.kAnnotLink) { continue; } if (!page.FlattenAnnot(annot)) { throw new Exception(pdfix.GetError()); } } page.SetContent(); page.Release(); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
////////////////////////////////////////////////////////////////////////////////////////////////// // GetParagraphTextState // get the text state of the text objects inside paragraph by iterating content kid objects ////////////////////////////////////////////////////////////////////////////////////////////////// static PdfTextState GetParagraphTextState(PdsStructElement struct_elem) { for (int i = 0; i < struct_elem.GetNumChildren(); i++) { if (struct_elem.GetChildType(i) == PdfStructElementType.kPdsStructChildPageContent) { // acquire page on which the element is present PdfDoc doc = struct_elem.GetStructTree().GetDoc(); PdfPage page = doc.AcquirePage(struct_elem.GetChildPageNumber(i)); // find text object with mcid on the page to get the text state int mcid = struct_elem.GetChildMcid(i); var content = page.GetContent(); for (int j = 0; j < content.GetNumObjects();) { var ts = GetPageObjectTextState(content.GetObject(j), mcid); page.Release(); return(ts); } page.Release(); } } return(new PdfTextState()); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath // dest PDF document ) { pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetErrorType().ToString()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } PdsStructElement paragraph = GetFirstParagraph(struct_tree); if (paragraph == null) { throw new Exception("No paragraph found."); } PdfRect annot_bbox = new PdfRect(); GetStructElementBBox(paragraph, ref annot_bbox); // add new link annotation to the page PdfPage page = doc.AcquirePage(0); PdfLinkAnnot annot = page.AddLinkAnnot(0, annot_bbox); if (annot == null) { throw new Exception(pdfix.GetErrorType().ToString()); } // re-tag the document the link annotation if (!doc.RemoveTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } if (!doc.AddTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } if (!doc.Save(savePath, PdfSaveFlags.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); pdfix.Destroy(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath // output PDF document ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } PdfPage page = doc.AcquirePage(0); if (page == null) { throw new Exception(pdfix.GetError()); } PdfRect cropBox = page.GetCropBox(); // place annotation to the middle of the page PdfRect annotRect = new PdfRect(); annotRect.left = (cropBox.right + cropBox.left) / 2.0 - 10; annotRect.bottom = (cropBox.top + cropBox.bottom) / 2.0 - 10; annotRect.right = (cropBox.right + cropBox.left) / 2.0 + 10; annotRect.top = (cropBox.top + cropBox.bottom) / 2.0 + 10; PdfTextAnnot annot = page.AddTextAnnot(-1, annotRect); if (annot == null) { throw new Exception(pdfix.GetError()); } annot.SetAuthor(@"Peter Brown"); annot.SetContents(@"This is my comment."); annot.AddReply(@"Mark Fish", @"This is some reply."); page.Release(); if (!doc.Save(savePath, PdfSaveFlags.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); pdfix.Destroy(); }
public static void Run( String openPath, // source PDF document String savePath // dest PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } PdsStructElement paragraph = GetFirstParagraph(struct_tree); if (paragraph == null) { throw new Exception("No paragraph found."); } PdfRect annot_bbox = new PdfRect(); GetStructElementBBox(paragraph, ref annot_bbox); // add new link annotation to the page PdfPage page = doc.AcquirePage(0); PdfLinkAnnot annot = (PdfLinkAnnot)page.CreateAnnot(PdfAnnotSubtype.kAnnotLink, annot_bbox); page.AddAnnot(0, annot); if (annot == null) { throw new Exception(pdfix.GetErrorType().ToString()); } // re-tag the document the link annotation if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath // dest PDF document ) { pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetErrorType().ToString()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } // tag text on the bottom of the page as artifact for (int i = 0; i < struct_tree.GetNumKids(); i++) { PdsObject kid_obj = struct_tree.GetKidObject(i); PdsStructElement kid_elem = struct_tree.AcquireStructElement(kid_obj); RemoveParagraph(kid_elem); kid_elem.Release(); } // the struct tree was updates, save page content on each page to apply changes for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); MarkUntaggedObjectsAsArtifact(page); page.Release(); } if (!doc.Save(savePath, PdfSaveFlags.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); pdfix.Destroy(); }
public static void Run( String openPath, // source PDF document String savePath, // output PDF document String imgPath // watermark to apply ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // identify image format from file path PdfImageFormat format = PdfImageFormat.kImageFormatJpg; if (Path.GetExtension(imgPath).ToLower() == ".png") { format = PdfImageFormat.kImageFormatPng; } // load image file data into memory stream byte[] bytes = File.ReadAllBytes(imgPath); var memStm = pdfix.CreateMemStream(); if (memStm == null) { throw new Exception(pdfix.GetError()); } memStm.Write(0, bytes); // create XObject from the image var image_obj = doc.CreateXObjectFromImage(memStm, format); if (image_obj == null) { throw new Exception(pdfix.GetError()); } // add annotation on the first page var page = doc.AcquirePage(0); if (page == null) { throw new Exception(pdfix.GetError()); } var page_view = page.AcquirePageView(1, PdfRotate.kRotate0); if (page_view == null) { throw new Exception(pdfix.GetError()); } // rect for the new annotation PdfRect annot_rect = new PdfRect() { left = 100, right = 300, bottom = 100, top = 200 }; PdfAnnot annot = page.CreateAnnot(PdfAnnotSubtype.kAnnotStamp, annot_rect); page.AddAnnot(0, annot); // create content var content = doc.CreateContent(); // add image to content var xobjdict = image_obj.GetStreamDict(); var width = xobjdict.GetNumber("Width"); var height = xobjdict.GetNumber("Height"); var ratio = height / width; var res_width = annot_rect.right - annot_rect.left; var res_height = res_width * ratio; var center_adj = ((annot_rect.top - annot_rect.bottom) - res_height) / 2; var imageobject = content.AddNewImage(-1, image_obj, new PdfMatrix(res_width, 0, 0, res_height, 0, center_adj)); PdfGraphicState imageGs = new PdfGraphicState(); imageGs.color_state.fill_opacity = 255; imageobject.SetGState(imageGs); // create text state PdfTextState textState = new PdfTextState(); var colorSpace = doc.CreateColorSpace(PdfColorSpaceFamily.kColorSpaceDeviceRGB); var fontName = "Segoe UI"; var fn = fontName.Trim(); var sysFont = pdfix.FindSysFont(fn, 0, PdfFontCodepage.kFontDefANSICodepage); if (sysFont == null) { throw new Exception(pdfix.GetError()); } textState.font = doc.CreateFont(sysFont, PdfFontCharset.kFontAnsiCharset, 0); sysFont.Destroy(); if (textState.font == null) { throw new Exception(pdfix.GetError()); } textState.font_size = 11; textState.color_state.fill_type = PdfFillType.kFillTypeSolid; var fill_color = colorSpace.CreateColor(); fill_color.SetValue(0, 0.5f); fill_color.SetValue(1, 0.5f); fill_color.SetValue(2, 0.5f); textState.color_state.fill_color = fill_color; textState.color_state.fill_opacity = 255; textState.color_state.stroke_type = PdfFillType.kFillTypeSolid; var stroke_color = colorSpace.CreateColor(); stroke_color.SetValue(0, 0); stroke_color.SetValue(1, 0); stroke_color.SetValue(2, 0); textState.color_state.stroke_color = stroke_color; textState.color_state.stroke_opacity = 255; textState.char_spacing = 2; // add text to content var line = "test text in appearance"; var textObject = content.AddNewText(-1, textState.font, new PdfMatrix(1, 0, 0, 1, 10, 10)); if (textObject == null) { throw new Exception(pdfix.GetError()); } textObject.SetTextState(textState); textObject.SetText(line); // set annotation appearance PdsContentParams contentParams = new PdsContentParams(); contentParams.bbox = new PdfRect(annot_rect.right - annot_rect.left, 0, 0, annot_rect.top - annot_rect.bottom); contentParams.matrix = new PdfMatrix(1, 0, 0, 1, 0, 0); contentParams.form_type = 1; contentParams.flags = 2; var appearance_stream = content.ToObject(doc, contentParams); annot.SetAppearanceFromXObject(appearance_stream, PdfAnnotAppearanceMode.kAppearanceNormal); page.Release(); // save document if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public static void Run( String openPath, // source PDF document String savePath // dest PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } PdsStructElement table = GetFirstTable(struct_tree); if (table == null) { throw new Exception("No table found."); } PdfRect bbox = new PdfRect(); GetStructElementBBox(table, ref bbox); // remove all items from the table to make it untagged cotnent for (int i = table.GetNumChildren() - 1; i >= 0; i--) { table.RemoveChild(i); } // tag page PdfPage page = doc.AcquirePage(0); PdePageMap page_map = page.AcquirePageMap(); PdeElement elem = page_map.CreateElement(PdfElementType.kPdeImage, null); elem.SetBBox(bbox); elem.SetAlt("This is image caption"); // prepare document template to ignore already tagged content var doc_prelight = doc.GetTemplate(); doc_prelight.SetProperty("ignore_tags", 1); // re-tag non-tagged page content PdePageMap pageMap = page.AcquirePageMap(); if (pageMap == null) { throw new Exception(pdfix.GetError()); } if (!pageMap.CreateElements(null, null)) { throw new Exception(pdfix.GetError()); } if (!page_map.AddTags(table, null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // udpate the table element type if (!table.SetType("Sect")) { throw new Exception(pdfix.GetErrorType().ToString()); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public static void Run( String openPath, // source PDF document String savePath // output PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // crate color space var rgb_color_space = doc.CreateColorSpace(PdfColorSpaceFamily.kColorSpaceDeviceRGB); var page_num = doc.GetNumPages(); for (int i = 0; i < page_num; i++) { List <PdfRect> link_annots = new List <PdfRect> { }; var page = doc.AcquirePage(i); var annot_num = page.GetNumAnnots(); for (int x = 0; x < annot_num; x++) { var annot = page.GetAnnot(x); if (annot.GetSubtype().Equals(PdfAnnotSubtype.kAnnotLink)) { link_annots.Add(annot.GetBBox()); } } // get page content var content = page.GetContent(); var objects_num = content.GetNumObjects(); for (int z = 0; z < objects_num; z++) { var content_obj = content.GetObject(z); if (content_obj.GetObjectType().Equals(PdfPageObjectType.kPdsPageText)) { foreach (var link_rect in link_annots) { if (Utils.contains(link_rect, content_obj.GetBBox())) { var text_obj = (PdsText)content_obj; //create blue color and set to text var blue = rgb_color_space.CreateColor(); blue.SetValue(0, 0); blue.SetValue(1, 0); blue.SetValue(2, 1); var text_state = text_obj.GetTextState(); text_state.color_state.fill_color = blue; text_obj.SetTextState(text_state); } } } } // apply changes on content page.SetContent(); } // save document if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public async Task <ResponseObject> ExtractJsonFromPDF(string email, string licenseKey, string filePath, List <string> imageList) { ResponseObject responseObject = new ResponseObject(); List <string> errorList = new List <string>(); try { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(filePath, ""); if (doc == null) { throw new Exception(); } int pageCount = doc.GetNumPages(); List <PDF> pdfList = new List <PDF>(); for (int i = 0; i < doc.GetNumPages(); i++) { List <PDFObject> pdfObjectList = new List <PDFObject>(); PdfPage page = doc.AcquirePage(i); PDF pdfObj = new PDF(); pdfObj.page = i.ToString(); pdfObj.imageUrl = GetBase64String(imageList[i]); _tabOrder = 0; int annots = page.GetNumAnnots(); for (int j = 0; j < page.GetNumAnnots(); j++) { PdfAnnot pdfAnnot = page.GetAnnot(j); PdfAnnotSubtype pdfAnnotSubtype = pdfAnnot.GetSubtype(); PdfFormField field = null; bool isChecked = false; if (pdfAnnotSubtype == PdfAnnotSubtype.kAnnotLink) { var widget = (PdfLinkAnnot)pdfAnnot; field = doc.GetFormField(j); isChecked = field.GetValue() == field.GetWidgetExportValue(widget); } if (pdfAnnotSubtype == PdfAnnotSubtype.kAnnotWidget) { var widget = (PdfWidgetAnnot)pdfAnnot; field = widget.GetFormField(); if (field == null) { field = doc.GetFormField(j); } isChecked = field.GetValue() == field.GetWidgetExportValue(widget); } if (pdfAnnotSubtype == PdfAnnotSubtype.kAnnotHighlight) { var widget = (PdfTextMarkupAnnot)pdfAnnot; field = doc.GetFormField(j); isChecked = field.GetValue() == field.GetWidgetExportValue(widget); } if (field == null) { field = doc.GetFormField(j); string fieldName = field.GetFullName(); errorList.Add(fieldName); throw new Exception(); } PDFObject pdfObject = new PDFObject(); pdfObject.fieldName = field.GetFullName(); pdfObject.fieldValue = field.GetValue(); pdfObject.maxLength = field.GetMaxLength(); pdfObject.tooltip = field.GetTooltip(); pdfObject.displayName = field.GetDefaultValue(); pdfObject.multiLine = ((field.GetFlags() & Pdfix.kFieldFlagMultiline) != 0) ? true : false; pdfObject.isFormatted = ((field.GetAAction(PdfActionEventType.kActionEventFieldFormat)) != null) ? true : false; pdfObject.required = ((field.GetFlags() & Pdfix.kFieldFlagRequired) != 0) ? true : false; pdfObject.readOnly = ((field.GetFlags() & Pdfix.kFieldFlagReadOnly) != 0) ? true : false; pdfObject.tabOrder = _tabOrder++; pdfObject.isChecked = isChecked; pdfObject.fieldType = GetFieldType(field); List <string> dropdownList = new List <string>(); for (int k = 0; k < field.GetOptionCount(); k++) { string optionValue = field.GetOptionValue(k); dropdownList.Add(optionValue); } pdfObject.optionList = dropdownList; PdfRect bbox = pdfAnnot.GetBBox(); PdfAnnotAppearance pdfAnnotAppearance = pdfAnnot.GetAppearance(); PdfPageView pageView = page.AcquirePageView(1.0, PdfRotate.kRotate0); if (pageView == null) { throw new Exception(pdfix.GetError()); } var devRect = pageView.RectToDevice(bbox); var x = devRect.left; var y = devRect.top; var width = devRect.right - devRect.left; var height = devRect.bottom - devRect.top; var pageWidth = pageView.GetDeviceWidth(); var pageHeight = pageView.GetDeviceHeight(); var pdfvalue = ((double)x / pageWidth) * 100; var percentage = Convert.ToInt32(Math.Round(pdfvalue, 2)); pdfObject.x = ((double)devRect.left / pageView.GetDeviceWidth()) * 100; pdfObject.y = ((double)devRect.top / pageView.GetDeviceHeight()) * 100; pdfObject.width = ((double)(devRect.right - devRect.left) / pageView.GetDeviceWidth()) * 100; pdfObject.height = ((double)(devRect.bottom - devRect.top) / pageView.GetDeviceHeight()) * 100; pageView.Release(); pdfObjectList.Add(pdfObject); } pdfObj.pdfObjList = pdfObjectList; pdfObj.width = 927; pdfObj.height = 1200; pdfList.Add(pdfObj); } responseObject.flag = true; responseObject.data = pdfList; responseObject.message = "Document Import Successfully"; doc.Close(); pdfix.Destroy(); } catch (Exception ex) { responseObject.errorList = errorList; throw ex; } return(responseObject); }
public static void Run( String openPath, // source PDF document String savePath, // output PDF document String imgPath // watermark to apply ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // identify image format from file path PdfImageFormat format = PdfImageFormat.kImageFormatJpg; if (Path.GetExtension(imgPath).ToLower() == ".png") { format = PdfImageFormat.kImageFormatPng; } // load image file data into memory stream byte[] bytes = File.ReadAllBytes(imgPath); var memStm = pdfix.CreateMemStream(); if (memStm == null) { throw new Exception(pdfix.GetError()); } memStm.Write(0, bytes); // create XObject from the image var image_obj = doc.CreateXObjectFromImage(memStm, format); if (image_obj == null) { throw new Exception(pdfix.GetError()); } // find or add annotation on the first page var page = doc.AcquirePage(0); if (page == null) { throw new Exception(pdfix.GetError()); } PdfAnnot annot = null;// page.GetAnnot(0); if (annot == null) { // create new annotation var page_view = page.AcquirePageView(1, PdfRotate.kRotate0); if (page_view == null) { throw new Exception(pdfix.GetError()); } // rect for the new annotation PdfRect annot_rect = new PdfRect() { left = 100, right = 300, bottom = 100, top = 200 }; annot = page.CreateAnnot(PdfAnnotSubtype.kAnnotStamp, annot_rect); page.AddAnnot(0, annot); } // set annotation appearance if (!annot.SetAppearanceFromXObject(image_obj, PdfAnnotAppearanceMode.kAppearanceNormal)) { throw new Exception(pdfix.GetError()); } page.Release(); // save document if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public async Task <List <string> > ExtractImage( String email, String licenseKey, String openPath, String imgPath, Double zoom ) { List <string> imageList = new List <string>(); try { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } PdfPageView pageView = page.AcquirePageView(zoom, PdfRotate.kRotate0); if (pageView == null) { throw new Exception(pdfix.GetError()); } int width = pageView.GetDeviceWidth(); int height = pageView.GetDeviceHeight(); PsImage image = pdfix.CreateImage(width, height, PsImageDIBFormat.kImageDIBFormatArgb); if (image == null) { throw new Exception(pdfix.GetError()); } PdfPageRenderParams pdfPageRenderParams = new PdfPageRenderParams(); pdfPageRenderParams.image = image; pdfPageRenderParams.matrix = pageView.GetDeviceMatrix(); pdfPageRenderParams.render_flags = Pdfix.kRenderAnnot; if (!page.DrawContent(pdfPageRenderParams, null, IntPtr.Zero)) { throw new Exception(pdfix.GetError()); } PsStream stream = pdfix.CreateFileStream(imgPath + i.ToString() + ".jpg", PsFileMode.kPsWrite); PdfImageParams imgParams = new PdfImageParams(); imgParams.format = PdfImageFormat.kImageFormatJpg; imgParams.quality = 75; if (!image.SaveToStream(stream, imgParams)) { throw new Exception(pdfix.GetError()); } imageList.Add(imgPath + i.ToString()); stream.Destroy(); pageView.Release(); page.Release(); } doc.Close(); } catch (Exception ex) { throw ex; } return(imageList); }
public static void Run( String openPath, // source PDF document String savePath // output image document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // choose page to render var page = doc.AcquirePage(0); if (page == null) { throw new Exception(pdfix.GetError()); } // prepare page view with zoom and rotation var pageView = page.AcquirePageView((float)1.0, PdfRotate.kRotate0); if (pageView == null) { throw new Exception(pdfix.GetError()); } // prepare an image to write data into var image = pdfix.CreateImage(pageView.GetDeviceWidth(), pageView.GetDeviceHeight(), PsImageDIBFormat.kImageDIBFormatArgb); if (image == null) { throw new Exception(pdfix.GetError()); } // draw content into the image if (!page.DrawContent(new PdfPageRenderParams() { clip_box = page.GetCropBox(), matrix = pageView.GetDeviceMatrix(), image = image }, null, null)) { throw new Exception(pdfix.GetError()); } // prepare file stream to write into var stm = pdfix.CreateFileStream(savePath, PsFileMode.kPsTruncate); if (stm == null) { throw new Exception(pdfix.GetError()); } // save image to file if (!image.SaveToStream(stm, new PdfImageParams() { format = PdfImageFormat.kImageFormatJpg, quality = 80 })) { throw new Exception(pdfix.GetError()); } // cleanup stm.Destroy(); page.Release(); doc.Close(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath, // output PDF document String configPath // configuration file ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } PsFileStream stm = pdfix.CreateFileStream(configPath, PsFileMode.kPsReadOnly); if (stm != null) { PdfDocTemplate docTmpl = doc.GetDocTemplate(); if (docTmpl == null) { throw new Exception(pdfix.GetError()); } docTmpl.LoadFromStream(stm, PsDataFormat.kDataFormatJson); stm.Destroy(); } // define a cancel progress callback PdfCancelProc cancel_callback = (data) => { // to cancel the process return 1 Console.WriteLine("PdfCancelProc callback was called"); return(0); }; PdfPage page = doc.AcquirePage(0); PdePageMap pageMap = page.AcquirePageMap(null, IntPtr.Zero); // define an event callback PdfEventProc event_callback = (data) => { Console.WriteLine("Page contents did change. Releasing pageMap..."); if (pageMap != null) { pageMap.Release(); pageMap = null; } }; if (!pdfix.RegisterEvent(PdfEventType.kEventPageContentsDidChange, event_callback, IntPtr.Zero)) { throw new Exception(pdfix.GetError()); } if (!doc.AddTags(cancel_callback, IntPtr.Zero)) { throw new Exception(pdfix.GetError()); } if (!doc.Save(savePath, PdfSaveFlags.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); pdfix.Destroy(); }