public static void Run( String openPath, // source PDF document String savePath // dest PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // get the struct tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { throw new Exception(pdfix.GetErrorType().ToString()); } // tag text on the bottom of the page as artifact for (int i = 0; i < struct_tree.GetNumChildren(); i++) { PdsObject kid_obj = struct_tree.GetChildObject(i); PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj); RemoveParagraph(kid_elem); } // the struct tree was updates, save page content on each page to apply changes for (int i = 0; i < doc.GetNumPages(); i++) { PdfPage page = doc.AcquirePage(i); MarkUntaggedObjectsAsArtifact(page); page.Release(); } if (!doc.Save(savePath, Pdfix.kSaveFull)) { throw new Exception(pdfix.GetError()); } doc.Close(); }
private static bool MoveParagraphToParent(PdsStructTree struct_tree) { for (int i = 0; i < struct_tree.GetNumChildren();) { PdsObject kid_obj = struct_tree.GetChildObject(i); PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj); var paragraph = MoveParagraphToParent(kid_elem); return(paragraph); } return(false); }
private static PdsStructElement GetFirstTable(PdsStructTree struct_tree) { for (int i = 0; i < struct_tree.GetNumChildren(); i++) { PdsObject kid_obj = struct_tree.GetChildObject(i); PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj); var paragraph = GetFirstTable(kid_elem); if (paragraph != null) { return(paragraph); } } return(null); }
public static void Run( String openPath // source PDF document ) { Pdfix pdfix = PdfixEngine.Instance; PdfDoc doc = pdfix.OpenDoc(openPath, ""); if (doc == null) { throw new Exception(pdfix.GetError()); } // cleanup any previous structure tree if (!doc.RemoveTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // autotag document first if (!doc.AddTags(null, null)) { throw new Exception(pdfix.GetErrorType().ToString()); } // read document structure tree PdsStructTree struct_tree = doc.GetStructTree(); if (struct_tree == null) { Console.WriteLine("No Tags available"); } else { for (var i = 0; i < struct_tree.GetNumChildren(); i++) { PdsObject kid_object = struct_tree.GetChildObject(i); PdsStructElement struct_elem = struct_tree.GetStructElementFromObject(kid_object); ProcessStructElement(doc, struct_elem, ""); } } doc.Close(); }