public static void Run(
            String openPath,                            // source PDF document
            String savePath                             // dest PDF document
            )
        {
            Pdfix pdfix = PdfixEngine.Instance;

            PdfDoc doc = pdfix.OpenDoc(openPath, "");

            if (doc == null)
            {
                throw new Exception(pdfix.GetError());
            }

            // cleanup any previous structure tree
            if (!doc.RemoveTags(null, null))
            {
                throw new Exception(pdfix.GetErrorType().ToString());
            }

            // autotag document first
            if (!doc.AddTags(null, null))
            {
                throw new Exception(pdfix.GetErrorType().ToString());
            }

            // get the struct tree
            PdsStructTree struct_tree = doc.GetStructTree();

            if (struct_tree == null)
            {
                throw new Exception(pdfix.GetErrorType().ToString());
            }

            // tag text on the bottom of the page as artifact
            for (int i = 0; i < struct_tree.GetNumChildren(); i++)
            {
                PdsObject        kid_obj  = struct_tree.GetChildObject(i);
                PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj);
                RemoveParagraph(kid_elem);
            }

            // the struct tree was updates, save page content on each page to apply changes
            for (int i = 0; i < doc.GetNumPages(); i++)
            {
                PdfPage page = doc.AcquirePage(i);
                MarkUntaggedObjectsAsArtifact(page);
                page.Release();
            }

            if (!doc.Save(savePath, Pdfix.kSaveFull))
            {
                throw new Exception(pdfix.GetError());
            }

            doc.Close();
        }
 private static bool MoveParagraphToParent(PdsStructTree struct_tree)
 {
     for (int i = 0; i < struct_tree.GetNumChildren();)
     {
         PdsObject        kid_obj  = struct_tree.GetChildObject(i);
         PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj);
         var paragraph             = MoveParagraphToParent(kid_elem);
         return(paragraph);
     }
     return(false);
 }
 private static PdsStructElement GetFirstTable(PdsStructTree struct_tree)
 {
     for (int i = 0; i < struct_tree.GetNumChildren(); i++)
     {
         PdsObject        kid_obj  = struct_tree.GetChildObject(i);
         PdsStructElement kid_elem = struct_tree.GetStructElementFromObject(kid_obj);
         var paragraph             = GetFirstTable(kid_elem);
         if (paragraph != null)
         {
             return(paragraph);
         }
     }
     return(null);
 }
Пример #4
0
        public static void Run(
            String openPath                             // source PDF document
            )
        {
            Pdfix pdfix = PdfixEngine.Instance;

            PdfDoc doc = pdfix.OpenDoc(openPath, "");

            if (doc == null)
            {
                throw new Exception(pdfix.GetError());
            }

            // cleanup any previous structure tree
            if (!doc.RemoveTags(null, null))
            {
                throw new Exception(pdfix.GetErrorType().ToString());
            }

            // autotag document first
            if (!doc.AddTags(null, null))
            {
                throw new Exception(pdfix.GetErrorType().ToString());
            }

            // read document structure tree
            PdsStructTree struct_tree = doc.GetStructTree();

            if (struct_tree == null)
            {
                Console.WriteLine("No Tags available");
            }
            else
            {
                for (var i = 0; i < struct_tree.GetNumChildren(); i++)
                {
                    PdsObject        kid_object  = struct_tree.GetChildObject(i);
                    PdsStructElement struct_elem = struct_tree.GetStructElementFromObject(kid_object);
                    ProcessStructElement(doc, struct_elem, "");
                }
            }


            doc.Close();
        }