public static void Run( String openPath // source PDF document ) { Pdfix pdfix = PdfixEngine.Instance; doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } String[] op = new string[4]; op[0] = "count pages"; op[1] = "remove annotations"; op[2] = "place watermark"; op[3] = "extract table"; for (int j = 0; j < 4; j++) { t[j] = new Thread(DoSomething); t[j].Name = op[j]; t[j].Start(); } for (int j = 0; j < 4; j++) { t[j].Join(); } doc.Close(); doc = null; pdfix.Destroy(); pdfix = null; }
////////////////////////////////////////////////////////////////////////////////////////////////// // GetParagraphTextState // get the text state of the text objects inside paragraph by iterating content kid objects ////////////////////////////////////////////////////////////////////////////////////////////////// static PdfTextState GetParagraphTextState(PdsStructElement struct_elem) { for (int i = 0; i < struct_elem.GetNumKids(); i++) { if (struct_elem.GetKidType(i) == PdfStructElementType.kPdsStructKidPageContent) { // acquire page on which the element is present PdfDoc doc = struct_elem.GetStructTree().GetDoc(); PdfPage page = doc.AcquirePage(struct_elem.GetKidPageNumber(i)); // find text object with mcid on the page to get the text state int mcid = struct_elem.GetKidMcid(i); var num_pages = page.GetNumPageObjects(); for (int j = 0; j < page.GetNumPageObjects(); j++) { var ts = GetPageObjectTextState(page.GetPageObject(j), mcid); // Handled by MIkhaylov KS if (ts.font_size == 0) { continue; } page.Release(); return(ts); } page.Release(); } } return(new PdfTextState()); }
public static void Run( String openPath, // source PDF document String savePath, // output TXT document String configPath // configuration file ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // iterate through pages and parse each page individually for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } ParsePage(pdfix, page, savePath); page.Release(); } Console.WriteLine(tableIndex + " tables detected"); doc.Close(); }
public static void Run( String openPath // source PDF document ) { var pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } Console.WriteLine("detect form field tab order"); ProcessFormFieldsViaPages(doc); Console.WriteLine(""); Console.WriteLine("********************************************************"); Console.WriteLine(""); Console.WriteLine("map all document fields, some spccific proerties may ne inaccesscible"); ProcessDocumentFormFields(doc); doc.Close(); }
public static void Run( String openPath, // source PDF document String savePath ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } for (int i = 0; i < doc.GetNumPages(); i++) { var page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } var content = page.GetContent(); for (int j = 0; j < content.GetNumObjects(); j++) { ProcessPageObject(page, content.GetObject(j), savePath); } page.Release(); } doc.Close(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath // source PDF document ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } PdsObject rootObj = doc.GetRootObject(); ParseObject(rootObj, 1); doc.Close(); pdfix.Destroy(); }
public static void Run( String openPath, // source PDF document String savePath, // output PDF document bool preflight, // preflight page before tagging String language, // document reading language String title, // document title String configPath // configuration file ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } var doc_template = doc.GetTemplate(); // convert to PDF/UA PdfAccessibleParams accParams = new PdfAccessibleParams(); if (!doc.MakeAccessible(accParams, null, null)) { throw new Exception(pdfix.GetError()); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public static void Run( String openPath, // source PDF document String savePath // output PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetError()); } if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetError()); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public static void Run( String openPath // source PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } var rootObj = doc.GetRootObject(); var layers = ReadOCGLayers.ReadLayerNames(rootObj); var page = doc.AcquirePage(0); foreach (KeyValuePair <string, int> layer in layers) { Console.WriteLine("Text in layer " + layer.Key + "(" + layer.Value.ToString() + ")"); var content = page.GetContent(); for (var i = 0; i < content.GetNumObjects(); i++) { var page_obj = content.GetObject(i); CheckPageObject(page_obj, layer); } } page.Release(); doc.Close(); }
private static void ExtractDocumentPages(PdfDoc doc, JObject node, DataType data_types) { var pages_node = new JArray(); // node holding the page array var from_page = data_types.page_num == -1 ? 0 : data_types.page_num; var to_page = data_types.page_num == -1 ? doc.GetNumPages() - 1 : data_types.page_num; for (var i = from_page; i <= to_page; i++) { var page = doc.AcquirePage(i); if (page == null) { throw new Exception(new Pdfix().GetError()); } var page_node = new JObject(); // node holding the page page_node.Add("page_number", i + 1); ExtractPageData(page, page_node, data_types); if (page_node.Count > 0) { pages_node.Add(page_node); } } if (pages_node.Count > 0) { node.Add("pages", pages_node); } }
public static void Run( String openPath // source PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // iterate through pages and parse each page individually for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); if (page == null) { throw new Exception(pdfix.GetError()); } ParsePage(pdfix, page); page.Release(); } doc.Close(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath // dest PDF document ) { pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetErrorType().ToString()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, IntPtr.Zero)) { throw new Exception(pdfix.GetErrorType().ToString()); } // read document structure tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { Console.WriteLine("No Tags available"); } else { for (var i = 0; i < struct_tree.GetNumKids(); i++) { PdsObject kid_object = struct_tree.GetKidObject(i); PdsStructElement struct_elem = struct_tree.AcquireStructElement(kid_object); ProcessStructElement(doc, struct_elem, ""); struct_elem.Release(); } } doc.Close(); pdfix.Destroy(); }
private static void ProcessDocumentFormFields(PdfDoc doc) { for (int i = 0; i < doc.GetNumFormFields(); i++) { var field = doc.GetFormField(i); GetFormFieldProperties(null, null, field); } }
// extract general document information (metadata, page count, is tagged, is form) public static void ExtractDocumentInfo(PdfDoc doc, JObject node, DataType data_types) { node.Add("title", doc.GetInfo("Title")); node.Add("author", doc.GetInfo("Author")); node.Add("creator", doc.GetInfo("Creator")); node.Add("num_pages", doc.GetNumPages()); node.Add("tagged", doc.GetStructTree() != null); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath, // output PDF document String dataPath, // path to OCR data String language // default OCR language ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } OcrTesseract ocr = new OcrTesseract(); if (ocr == null) { throw new Exception("OcrTesseract initialization fail"); } if (!ocr.Initialize(pdfix)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } ocr.SetLanguage(language); ocr.SetDataPath(dataPath); TesseractDoc ocrDoc = ocr.OpenOcrDoc(doc); if (ocrDoc == null) { throw new Exception(pdfix.GetError()); } //if (!ocrDoc.Save(savePath, ocrParams, null, IntPtr.Zero)) // throw new Exception(pdfix.GetError()); ocrDoc.Close(); doc.Close(); pdfix.Destroy(); }
public static void Run( String openPath, // source PDF document String savePath // dest PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } // tag text on the bottom of the page as artifact for (int i = 0; i < struct_tree.GetNumChildren(); i++) { PdsObject kid_obj = struct_tree.GetChildObject(i); PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj); RemoveParagraph(kid_elem); } // the struct tree was updates, save page content on each page to apply changes for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); MarkUntaggedObjectsAsArtifact(page); page.Release(); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public TesseractDoc OpenOcrDoc(PdfDoc _pdDoc) { CheckBaseObj(); IntPtr ret = OcrTesseractOpenOcrDoc(m_obj, _pdDoc == null ? IntPtr.Zero : _pdDoc.m_obj); if (ret != IntPtr.Zero) { return(new TesseractDoc(ret)); } return(null); }
private void flushFileInBrowser02() { if (!FlushInBrowser) { // close the document without closing the underlying stream PdfWriter.CloseStream = false; PdfDoc.Close(); //_pdfRptData.PdfStreamOutput.Position = 0; return; } }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath // dest PDF document ) { pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetErrorType().ToString()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } PdsStructElement figure = GetFirstFigure(struct_tree); if (figure == null) { throw new Exception("No figure found."); } if (!figure.SetAlt("This is a new alternate text")) { throw new Exception(pdfix.GetError()); } if (!doc.Save(savePath, PdfSaveFlags.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); pdfix.Destroy(); }
public PdfHtmlDoc OpenHtmlDoc(PdfDoc _doc) { CheckBaseObj(); IntPtr ret = PdfToHtmlOpenHtmlDoc(m_obj, _doc == null ? IntPtr.Zero : _doc.m_obj); if (ret != IntPtr.Zero) { return(new PdfHtmlDoc(ret)); } return(null); }
// collect bounding boxes of elements on a page with specified mcid static List <PdfRect> GetMcidBBoxes(PdfDoc doc, int page_num, int mcid) { var bboxes = new List <PdfRect>(); var page = doc.AcquirePage(page_num); for (var i = 0; i < page.GetNumPageObjects(); i++) { bboxes.AddRange(GetMcidBBoxes(page.GetPageObject(i), mcid)); } page.Release(); return(bboxes); }
public static void Run( String openPath, // source PDF document String savePath // output PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } PdfPage page = doc.AcquirePage(0); if (page == null) { throw new Exception(pdfix.GetError()); } PdfRect cropBox = page.GetCropBox(); // place annotation to the middle of the page PdfRect annotRect = new PdfRect(); annotRect.left = (float)((cropBox.right + cropBox.left) / 2.0) - 10; annotRect.bottom = (float)((cropBox.top + cropBox.bottom) / 2.0) - 10; annotRect.right = (float)((cropBox.right + cropBox.left) / 2.0) + 10; annotRect.top = (float)((cropBox.top + cropBox.bottom) / 2.0) + 10; PdfTextAnnot annot = (PdfTextAnnot)page.CreateAnnot(PdfAnnotSubtype.kAnnotText, annotRect); page.AddAnnot(-1, annot); if (annot == null) { throw new Exception(pdfix.GetError()); } annot.SetAuthor(@"Peter Brown"); annot.SetContents(@"This is my comment."); annot.AddReply(@"Mark Fish", @"This is some reply."); page.Release(); if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath, // output PDF document String pfxPath, // pfx file PKCS 12 certificate String pfxPassword // pfx password ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } PdfDigSig digSig = pdfix.CreateDigSig(); if (digSig == null) { throw new Exception(pdfix.GetError()); } digSig.SetReason("Testing PDFix API"); digSig.SetLocation("Location"); digSig.SetContactInfo("*****@*****.**"); if (!digSig.SetPfxFile(pfxPath, pfxPassword)) { throw new Exception(pdfix.GetError()); } if (!digSig.SignDoc(doc, savePath)) { throw new Exception(pdfix.GetError()); } digSig.Destroy(); doc.Close(); pdfix.Destroy(); }
/// <summary> /// 生成任务书的pdf文档,单页面加载 /// </summary> /// <param name="bookTaskId"></param> /// <param name="path"></param> /// <param name="addYinhooTag"></param> /// <returns></returns> public bool GeneratePdf(int bookTaskId, out string path, string customerName) { var q = this.FindAllByBookId(bookTaskId).ToList(); var bookTask = BookTaskBll._().FindById(bookTaskId); var pdfPages = new List <PdfDoc>(); foreach (BsonDocument BsonDocument in q) { PdfDoc page = new PdfDoc(); page.Id = BsonDocument.Int("pageId"); if (BsonDocument.ChildBsonList("PageBodies").FirstOrDefault() != null) { //通过url直接生成pdf用 string url = string.Format("http://{0}/account/PageVersionDetail/?pagId={1}", System.Web.HttpContext.Current.Request.ServerVariables["HTTP_HOST"], BsonDocument.Int("pageId")); page.Url = url; } page.Name = BsonDocument.Text("name"); page.NodeKey = BsonDocument.Text("nodeKey"); page.NodeLevel = BsonDocument.Int("nodeLevel"); page.NodeOrder = BsonDocument.Int("nodeOrder"); page.NodePid = BsonDocument.Int("nodePid"); page.Expended = true; pdfPages.Add(page); } string dir2 = string.Empty; string savePath = GetBookPagePdf(bookTaskId, out dir2); dir2 = System.Web.HttpContext.Current.Server.MapPath(dir2); savePath = System.Web.HttpContext.Current.Server.MapPath(savePath); if (!System.IO.Directory.Exists(dir2)) { System.IO.Directory.CreateDirectory(dir2); } string bookTaskUrl = string.Format("<a href=\"{0}\">{1}</a>", "http://" + System.Web.HttpContext.Current.Request.ServerVariables["HTTP_HOST"] + "/MissionStatement/Home/BsonDocument/" + bookTaskId, bookTask.Text("name")); var tag = HtmlHelper.MergePdf2(pdfPages, savePath, customerName, bookTaskUrl); if (tag) { path = savePath; return(true); } else { path = null; return(false); } }
public static void Run( String email, // authorization email String licenseKey, // authorization license key String openPath, // source PDF document String savePath, // output PDF document String imgPath, // watermark to apply PdfWatermarkParams watermarkParams // watermark params ) { Pdfix pdfix = new Pdfix(); if (pdfix == null) { throw new Exception("Pdfix initialization fail"); } if (!pdfix.Authorize(email, licenseKey)) { throw new Exception(pdfix.GetError()); } PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // set watermark params watermarkParams.page_range.start_page = 1; watermarkParams.page_range.end_page = 3; watermarkParams.page_range.page_range_spec = PdfPageRangeType.kEvenPagesOnly; watermarkParams.h_value = 10; watermarkParams.v_value = 10; watermarkParams.scale = 0.5; watermarkParams.opacity = 0.5; if (!doc.AddWatermarkFromImage(watermarkParams, imgPath)) { throw new Exception(pdfix.GetError()); } if (!doc.Save(savePath, PdfSaveFlags.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); pdfix.Destroy(); }
private void BuildPages() { PdfPage page = PdfDoc.AddPage(); Section section = Migradoc.AddSection(); Paragraph paragraph = section.AddParagraph(home.Address, "Heading1"); Migradoc.LastSection.AddParagraph(home.Price, "Heading2"); Migradoc.LastSection.AddParagraph(home.BriefFeatures.ToString(), "Heading3"); //AddImage(); Migradoc.LastSection.AddParagraph(home.Description, "Normal"); PopulateImages(); }
////////////////////////////////////////////////////////////////////////////////////////////////// // GetParagraphBBox // get the text state of the text objects inside paragraph by iterating content kid objects ////////////////////////////////////////////////////////////////////////////////////////////////// private static bool GetStructElementBBox(PdsStructElement struct_elem, ref PdfRect bbox) { bool result = false; for (int i = 0; i < struct_elem.GetNumChildren(); i++) { if (struct_elem.GetChildType(i) == PdfStructElementType.kPdsStructChildPageContent) { // acquire page on which the element is present PdfDoc doc = struct_elem.GetStructTree().GetDoc(); PdfPage page = doc.AcquirePage(struct_elem.GetChildPageNumber(i)); // find text object with mcid on the page to get the text state int mcid = struct_elem.GetChildMcid(i); var content = page.GetContent(); for (int j = 0; j < content.GetNumObjects(); j++) { PdsPageObject page_object = content.GetObject(j); // check if this text page object has the same mcid PdsContentMark content_mark = page_object.GetContentMark(); if (content_mark != null && content_mark.GetTagMcid() == mcid) { PdfRect elem_bbox = page_object.GetBBox(); if ((bbox.left - bbox.right == 0) || (bbox.top - bbox.bottom == 0)) { bbox = elem_bbox; } else { bbox.left = Math.Min(bbox.left, elem_bbox.left); bbox.right = Math.Max(bbox.right, elem_bbox.right); bbox.top = Math.Max(bbox.top, elem_bbox.top); bbox.bottom = Math.Min(bbox.bottom, elem_bbox.bottom); } result = true; } } } else if (struct_elem.GetChildType(i) == PdfStructElementType.kPdsStructChildElement) { PdsObject kid_obj = struct_elem.GetChildObject(i); PdsStructElement kid_elem = struct_elem.GetStructTree().GetStructElementFromObject(kid_obj); GetStructElementBBox(kid_elem, ref bbox); } } return(result); }
public static void Run( String openPath, // source PDF document String savePath // dest PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } // move paragraph to the back of it's parent if (!MoveParagraphToParent(struct_tree)) { throw new Exception("No table found."); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
private byte[] flushFileInBrowser() { if (!OutputAsByteArray) { return(null); } // close the document without closing the underlying stream PdfWriter.CloseStream = false; PdfDoc.Close(); _pdfRptData.PdfStreamOutput.Position = 0; // write pdf bytes to output stream var pdf = ((MemoryStream)_pdfRptData.PdfStreamOutput).ToArray(); return(pdf); }
private void flushFileInBrowser() { if (!FlushInBrowser) { return; } // close the document without closing the underlying stream PdfWriter.CloseStream = false; PdfDoc.Close(); _pdfRptData.PdfStreamOutput.Position = 0; // write pdf bytes to output stream var pdf = ((MemoryStream)_pdfRptData.PdfStreamOutput).ToArray(); SoftHttpContext.FlushInBrowser(_pdfRptData.FileName, pdf, FlushType); }