Пример #1
0
        static void Main(string[] args)
        {
            // Here we'll show you two modes of converting PDF to HTML:
            // PDF Focus .Net offers you the Fixed and Flowing modes by your choice.

            // HTML-Fixed (default) is better to use for rendering, because it completely
            // repeats the PDF layout with the structure of pages.
            // The markup of such documents is very complex and have a lot of tags styled by (x,y) coords.

            // HTML-Flowing is better for further processing by a human: editing and combining.
            // The markup of such documents is much simple inside and has the flowing structure.
            // It's very simple for understanding by a human.
            // But the resulting HTML document doesn't look exactly the same as input PDF pixel by pixel.

            string pdfFile         = @"..\..\License.pdf";
            string htmlFileFixed   = "Fixed.html";
            string htmlFileFlowing = "Flowing.html";

            // Convert PDF file to HTML (Fixed and Flowing) file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";

            // How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = true;

            f.OpenPdf(pdfFile);

            if (f.PageCount > 0)
            {
                // The HTML-Fixed mode.
                f.HtmlOptions.Title      = "Fixed";
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Fixed;
                if (f.ToHtml(htmlFileFixed) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFixed)
                    {
                        UseShellExecute = true
                    });
                }

                // The HTML-Flowing mode.
                f.HtmlOptions.Title      = "Flowing";
                f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Flowing;
                // Switch off character scaling and spacing to prevent
                // adding of extra tags dividing the text by parts.
                f.HtmlOptions.KeepCharScaleAndSpacing = false;

                if (f.ToHtml(htmlFileFlowing) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFlowing)
                    {
                        UseShellExecute = true
                    });
                }
            }
        }
Пример #2
0
        static void Main(string[] args)
        {
            string pdfFile  = @"..\..\..\..\..\Text.pdf";
            string htmlFile = Path.ChangeExtension(pdfFile, ".htm");

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // Let's change all text to Verdana 8pt.
            f.HtmlOptions.SingleFontFamily = "Verdana";
            f.HtmlOptions.SingleFontSize   = 8;


            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "123456789";

            f.OpenPdf(pdfFile);

            if (f.PageCount > 0)
            {
                int from = 1;
                int to   = (3 > f.PageCount) ? f.PageCount : 3;

                int result = f.ToHtml(htmlFile, from, to);

                // Show resulted HTML document in a browser.
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(htmlFile);
                }
            }
        }
        static void Main(string[] args)
        {
            string pathToPdf  = @"d:\Tempos\table.pdf";
            string pathToHtml = Path.ChangeExtension(pathToPdf, ".htm");

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // You may download the latest version of SDK here:
            // www.sautinsoft.com/products/pdf-focus/download.php


            // Let's force the component to store images inside HTML document
            // using base-64 encoding
            f.HtmlOptions.IncludeImageInHtml = true;
            f.HtmlOptions.Title = "Simple text";

            // This property is necessary only for registered version


            f.OpenPdf(pathToPdf);

            if (f.PageCount > 0)
            {
                int result = f.ToHtml(pathToHtml);

                //Show HTML document in browser
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(pathToHtml);
                }
            }
        }
Пример #4
0
        static void Main(string[] args)
        {
            string pdfFile  = @"..\..\..\..\..\simple text.pdf";
            string htmlFile = Path.ChangeExtension(pdfFile, ".htm");

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
            f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile);
            // A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            // We recommend to use PNG type for storing images.
            f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
            // How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = false;
            // Set <title>...</title>
            f.HtmlOptions.Title = "Simple text";

            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "123456789";

            f.OpenPdf(pdfFile);

            if (f.PageCount > 0)
            {
                int result = f.ToHtml(htmlFile);

                // Show resulted HTML document in a browser.
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(htmlFile);
                }
            }
        }
Пример #5
0
        static void Main(string[] args)
        {
            // Here you will find various ways to store images
            string pdfFile  = @"..\..\..\..\..\simple text.pdf";
            string htmlFile = Path.ChangeExtension(pdfFile, ".htm");

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // Way 1: Images will be stored as physical PNG files in: ImageFolder + ImageSubFolder.
            // For example: "d:\" + "special folder" = "d:\special folder\"

            /*
             * f.HtmlOptions.ImageFolder = @"d:\";
             * f.HtmlOptions.ImageSubFolder = "special folder";
             * f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
             */

            // Way 2: Images will be stored as PNG files in the same directory with HTML file.
            // All images on each page will be combined in a single image.

            /*
             * f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile);
             * f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
             * f.HtmlOptions.ImageSubFolder = "";
             * f.HtmlOptions.CombineImages = true;
             */

            // Way 3: Images will be stored as Jpeg files in a special folder "my images".
            // Images will have name "picture100.jpg", "picture101.jpg" .. "pictureN.jpg".

            // Let's set the quality for jpeg images to 95 percents.
            f.HtmlOptions.ImageFolder    = Path.GetDirectoryName(pdfFile);
            f.HtmlOptions.ImageType      = PdfFocus.CHtmlOptions.eHtmlImageType.Jpeg;
            f.HtmlOptions.JpegQuality    = 95;
            f.HtmlOptions.ImageSubFolder = "my images";
            f.HtmlOptions.ImageFileName  = "picture";
            f.HtmlOptions.ImageNumStart  = 100;
            f.HtmlOptions.CombineImages  = false;

            // Way 4: Images will be stored inside HTML document as base64 images.

            /*
             * f.HtmlOptions.IncludeImageInHtml = true;
             */

            f.OpenPdf(pdfFile);

            if (f.PageCount > 0)
            {
                int result = f.ToHtml(htmlFile);

                // Show resulted HTML document in a browser.
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(htmlFile);
                }
            }
        }
        private void btnPDF2Html_Click(object sender, EventArgs e) // PDF convert to Html Function
        {
            SautinSoft.PdfFocus Html = new SautinSoft.PdfFocus();  // 呼叫SautinSoft中的Html Convert Function
            Html.OpenPdf(@"C:\Users\施宗佑\OneDrive\桌面\期末報告 - 物件導向程式設計\測試用文檔\Graph.pdf");

            if (Html.PageCount > 0)
            {
                int result = Html.ToHtml(@"C:\Users\施宗佑\OneDrive\桌面\期末報告 - 物件導向程式設計\NewGraph.html");
            }
        }
Пример #7
0
        /// <summary>
        /// Converts PDF to DOCX, RTF, HTML, Text with OCR engine.
        /// </summary>
        public void ConvertPdfToAllWithOCR(string pdfPath)
        {
            // To perform OCR we'll use free OCR library by Nicomsoft.
            // https://www.nicomsoft.com/products/ocr/download/
            // The library is freeware and can be used in commercial application.
            // Also you have to insert this key:  AB2A4DD5FF2A.
            NsOCR = new NSOCRLib.NSOCRClass();
            NsOCR.Engine_SetLicenseKey("AB2A4DD5FF2A"); //required for licensed version only
            NsOCR.Engine_InitializeAdvanced(out CfgObj, out OcrObj, out ImgObj);

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            f.OCROptions.Method += PerformOCRNicomsoft;
            f.OCROptions.Mode    = PdfFocus.COCROptions.eOCRMode.AllImages;
            f.WordOptions.KeepCharScaleAndSpacing = false;

            string pdfFile = pdfPath;
            string outFile = String.Empty;

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // To Docx.
                outFile = "Result.docx";
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
                if (f.ToWord(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To HTML.
                outFile = "Result.html";
                f.HtmlOptions.KeepCharScaleAndSpacing = false;
                if (f.ToHtml(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }
            }
            else
            {
                Console.WriteLine("Error: {0}!", f.Exception.Message);
                Console.ReadLine();
            }
        }
Пример #8
0
        static void Main(string[] args)
        {
            string pdfFile  = @"..\..\simple text.pdf";
            string htmlFile = "Result.html";

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";

            // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
            f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile);

            // A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
            f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));

            // Auto - the same image format as in the source PDF;
            // 'Jpeg' to make the document size less;
            // 'PNG' to keep the highest quality, but the highest size too.
            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;

            // How to store images: Inside HTML document as base64 images or as linked separate image files.
            f.HtmlOptions.IncludeImageInHtml = false;

            // Set <title>...</title>
            f.HtmlOptions.Title = String.Format("This HTML was converted from {0}.", Path.GetFileName(pdfFile));

            f.OpenPdf(pdfFile);

            if (f.PageCount > 0)
            {
                int res = f.ToHtml(htmlFile);

                // Open the result for demonstration purposes.
                if (res == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile)
                    {
                        UseShellExecute = true
                    });
                }
            }
        }
Пример #9
0
        public static void ConvertToHtml(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string htmlFile = targum.HtmlFile;

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.EmbeddedImagesFormat           = PdfFocus.eImageFormat.Auto;
            f.HtmlOptions.IncludeImageInHtml = false;
            f.HtmlOptions.ImageSubFolder     = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.Title         = String.Format("This document was produced from {0}.", Path.GetFileName(pdfFile));
            f.HtmlOptions.ImageFileName = "picture";

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToHtml(htmlFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #10
0
        public static void ConvertToHtml(object targ)
        {
            TArgument targum  = (TArgument)targ;
            string    pdfFile = targum.PdfFile;
            int       page    = targum.PageNumber;

            string htmlFile = Path.ChangeExtension(pdfFile, ".html");

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.HtmlOptions.ImageType          = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
            f.HtmlOptions.IncludeImageInHtml = false;
            f.HtmlOptions.ImageSubFolder     = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.ImageFileName      = "pict";

            f.OpenPdf(pdfFile);

            bool done = false;

            if (f.PageCount > 0)
            {
                if (page >= f.PageCount)
                {
                    page = 1;
                }

                if (f.ToHtml(htmlFile, page, page) == 0)
                {
                    done = true;
                }
                f.ClosePdf();
            }

            if (done)
            {
                Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile));
            }
            else
            {
                Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile));
            }
        }
Пример #11
0
        /// <summary>
        /// Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Multipage TIFF, Text.
        /// </summary>
        public static void ConvertPdfToAll()
        {
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            string pdfFile = @"..\..\..\..\simple text.pdf";
            string outFile = String.Empty;

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // To Docx.
                outFile = Path.ChangeExtension(pdfFile, ".docx");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
                if (f.ToWord(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Rtf.
                outFile = Path.ChangeExtension(pdfFile, ".rtf");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf;
                if (f.ToWord(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Excel.
                outFile = Path.ChangeExtension(pdfFile, ".xls");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToExcel(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To HTML.
                outFile = Path.ChangeExtension(pdfFile, ".html");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToHtml(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To XML.
                outFile = Path.ChangeExtension(pdfFile, ".xml");
                f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToXml(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Image.
                outFile                    = Path.ChangeExtension(pdfFile, ".png");
                f.ImageOptions.Dpi         = 300;
                f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Png;
                if (f.ToImage(outFile, 1) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Multipage Tiff (Black & White).
                outFile = Path.ChangeExtension(pdfFile, ".tiff");
                f.ImageOptions.ColorDepth = PdfFocus.CImageOptions.eColorDepth.BlackWhite1bpp;
                if (f.ToMultipageTiff(outFile, System.Drawing.Imaging.EncoderValue.CompressionCCITT4) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Text.
                outFile = Path.ChangeExtension(pdfFile, ".txt");
                if (f.ToText(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }
            }
            else
            {
                Console.WriteLine("Error: {0}!", f.Exception.Message);
                Console.ReadLine();
            }
        }
Пример #12
0
        static void Main(string[] args)
        {
            // Here you will find how to keep images in the resulting HTML document.
            string pdfFile  = @"..\..\simple text.pdf";
            string htmlFile = "Result.html";

            // Convert PDF file to HTML file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";

            // Way 1 (default): Images will be stored inside HTML document as base64, jpeg images.

            /*
             * f.HtmlOptions.IncludeImageInHtml = true;
             * // Auto - the same image format as in the source PDF;
             * // 'Jpeg' to make the document size less;
             * // 'PNG' to keep the highest quality, but the highest size too.
             * f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Jpeg;
             */

            // Way 2: Images will be stored as JPG files in a special folder "{pdf name}_images".
            // Images will have names "picture100.jpg", "picture101.jpg" .. "pictureN.jpg".
            // Let's set the quality for jpeg to 95 percents.
            f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile);
            // Auto - the same image format as in the source PDF;
            // 'Jpeg' to make the document size less;
            // 'PNG' to keep the highest quality, but the highest size too.
            f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Jpeg;

            f.EmbeddedJpegQuality            = 95;
            f.HtmlOptions.ImageSubFolder     = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
            f.HtmlOptions.ImageFileName      = "picture";
            f.HtmlOptions.ImageNumStart      = 100;
            f.HtmlOptions.IncludeImageInHtml = false;

            // Way 3: Images will be stored as PNG files in the same directory with the HTML file.
            // All images on each page will be combined in a single image.

            /*
             * f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile);
             * // 'Jpeg' to make the document size less; Or 'PNG' to keep the highest quality.
             * f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Png;
             * f.HtmlOptions.ImageSubFolder = "";
             * f.HtmlOptions.IncludeImageInHtml = false;
             */

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                int res = f.ToHtml(htmlFile);
                // Open the result for demonstration purposes.
                if (res == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile)
                    {
                        UseShellExecute = true
                    });
                }
            }
        }