Пример #1
0
        public static void ConvertPdfToTextInThread()
        {
            string pdfs = @"..\..\..\..\..\";

            string[] files = Directory.GetFiles(pdfs, "*.pdf");

            List <Thread> threads = new List <Thread>();

            for (int i = 0; i < files.Length; i++)
            {
                TArgument targ = new TArgument()
                {
                    PdfFile    = files[i],
                    PageNumber = 1
                };

                var t = new Thread((a) => ConvertToText(a));
                t.Start(targ);
                threads.Add(t);
            }

            foreach (var thread in threads)
            {
                thread.Join();
            }
            Console.WriteLine("Done.");
            Console.ReadLine();
        }
Пример #2
0
        public static void ConvertToExcel(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string excelFile = Path.GetFileNameWithoutExtension(pdfFile) + ".xls";

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // 'true' = Convert all data to spreadsheet (tabular and even textual).
            // 'false' = Skip textual data and convert only tabular (tables) data.
            f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;

            // 'true'  = Preserve original page layout.
            // 'false' = Place tables before text.
            f.ExcelOptions.PreservePageLayout = true;

            // The information includes the names for the culture, the writing system,
            // the calendar used, the sort order of strings, and formatting for dates and numbers.
            System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo("en-US");
            ci.NumberFormat.NumberDecimalSeparator = ",";
            ci.NumberFormat.NumberGroupSeparator   = ".";
            f.ExcelOptions.CultureInfo             = ci;

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToExcel(excelFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(excelFile)
                {
                    UseShellExecute = true
                });
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #3
0
        public static void ConvertToExcel(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string excelFile = Path.ChangeExtension(pdfFile, ".xls");

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // 'true' = Convert all data to spreadsheet (tabular and even textual).
            // 'false' = Skip textual data and convert only tabular (tables) data.
            f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;

            // 'true'  = Preserve original page layout.
            // 'false' = Place tables before text.
            f.ExcelOptions.PreservePageLayout = true;

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToExcel(excelFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #4
0
            public Handler(ICollection <Task> tasks,
                           TArgument arg,
                           Action <Handler> doneCallback,
                           CancellableSignal overrideSignal)
            {
                this.tasks = new Task[tasks.Count];

                int n = 0;

                foreach (var task in tasks)
                {
                    this.tasks[n++] = task;
                }

                this.arg          = arg;
                this.doneCallback = doneCallback;
                this.signal       = null != overrideSignal ? overrideSignal : new CancellableSignal();
            }
Пример #5
0
        public static void ConvertToWord(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string docxFile = Path.GetFileNameWithoutExtension(pdfFile) + ".docx";

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.WordOptions.Format     = PdfFocus.CWordOptions.eWordDocument.Docx;
            f.WordOptions.RenderMode = PdfFocus.CWordOptions.eRenderMode.Flowing;

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToWord(docxFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(docxFile)
                {
                    UseShellExecute = true
                });
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #6
0
        public static void ConvertToPng(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string pngFile = Path.GetFileNameWithoutExtension(pdfFile) + ".png";

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Png;
            f.ImageOptions.Dpi         = 300;

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToImage(pngFile, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(pngFile)
                {
                    UseShellExecute = true
                });
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #7
0
        public static void ConvertToXml(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string xmlFile = Path.GetFileNameWithoutExtension(pdfFile) + ".xml";

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // Let's convert all data (textual and tabular) to XML.
            f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true;

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToXml(xmlFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(xmlFile)
                {
                    UseShellExecute = true
                });
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #8
0
        public static void ConvertToHtml(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string htmlFile = targum.HtmlFile;

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.EmbeddedImagesFormat           = PdfFocus.eImageFormat.Auto;
            f.HtmlOptions.IncludeImageInHtml = false;
            f.HtmlOptions.ImageSubFolder     = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.Title         = String.Format("This document was produced from {0}.", Path.GetFileName(pdfFile));
            f.HtmlOptions.ImageFileName = "picture";

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToHtml(htmlFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #9
0
        public static void ConvertToText(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string textFile = Path.GetFileNameWithoutExtension(pdfFile) + ".txt";

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.WordOptions.ShowInvisibleText = true;

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToText(textFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(textFile)
                {
                    UseShellExecute = true
                });
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #10
0
        public static void ConvertToHtml(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string htmlFile = Path.ChangeExtension(pdfFile, ".html");

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.HtmlOptions.ImageType          = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
            f.HtmlOptions.IncludeImageInHtml = false;
            f.HtmlOptions.ImageSubFolder     = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.ImageFileName      = "pict";

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToHtml(htmlFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #11
0
        public static void ConvertToText(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string textFile = Path.ChangeExtension(pdfFile, ".txt");

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.WordOptions.ShowInvisibleText = true;

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToText(textFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #12
0
        public static void ConvertPdfToHtmlInThread()
        {
            string pdfDir = @"..\..\";

            string[]      pdfFiles = Directory.GetFiles(pdfDir, "*.pdf");
            DirectoryInfo htmlDir  = new DirectoryInfo("HTML results");

            if (!htmlDir.Exists)
            {
                htmlDir.Create();
            }

            List <Thread> threads = new List <Thread>();

            foreach (string pdfFile in pdfFiles)
            {
                TArgument targ = new TArgument()
                {
                    PdfFile    = pdfFile,
                    HtmlFile   = Path.Combine(htmlDir.FullName, Path.GetFileNameWithoutExtension(pdfFile) + ".html"),
                    PageNumber = 1
                };

                var t = new Thread((a) => ConvertToHtml(a));
                t.Start(targ);
                threads.Add(t);
            }

            foreach (var thread in threads)
            {
                thread.Join();
            }
            Console.WriteLine("Done!");
            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlDir.FullName)
            {
                UseShellExecute = true
            });
        }