static void Main(string[] args) { // Here we'll show you two modes of converting PDF to HTML: // PDF Focus .Net offers you the Fixed and Flowing modes by your choice. // HTML-Fixed (default) is better to use for rendering, because it completely // repeats the PDF layout with the structure of pages. // The markup of such documents is very complex and have a lot of tags styled by (x,y) coords. // HTML-Flowing is better for further processing by a human: editing and combining. // The markup of such documents is much simple inside and has the flowing structure. // It's very simple for understanding by a human. // But the resulting HTML document doesn't look exactly the same as input PDF pixel by pixel. string pdfFile = @"..\..\License.pdf"; string htmlFileFixed = "Fixed.html"; string htmlFileFlowing = "Flowing.html"; // Convert PDF file to HTML (Fixed and Flowing) file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // After purchasing the license, please insert your serial number here to activate the component: //f.Serial = "XXXXXXXXXXX"; // How to store images: Inside HTML document as base64 images or as linked separate image files. f.HtmlOptions.IncludeImageInHtml = true; f.OpenPdf(pdfFile); if (f.PageCount > 0) { // The HTML-Fixed mode. f.HtmlOptions.Title = "Fixed"; f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Fixed; if (f.ToHtml(htmlFileFixed) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFixed) { UseShellExecute = true }); } // The HTML-Flowing mode. f.HtmlOptions.Title = "Flowing"; f.HtmlOptions.RenderMode = PdfFocus.CHtmlOptions.eHtmlRenderMode.Flowing; // Switch off character scaling and spacing to prevent // adding of extra tags dividing the text by parts. f.HtmlOptions.KeepCharScaleAndSpacing = false; if (f.ToHtml(htmlFileFlowing) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFileFlowing) { UseShellExecute = true }); } } }
static void Main(string[] args) { string pdfFile = @"..\..\..\..\..\Text.pdf"; string htmlFile = Path.ChangeExtension(pdfFile, ".htm"); // Convert PDF file to HTML file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // Let's change all text to Verdana 8pt. f.HtmlOptions.SingleFontFamily = "Verdana"; f.HtmlOptions.SingleFontSize = 8; // After purchasing the license, please insert your serial number here to activate the component: //f.Serial = "123456789"; f.OpenPdf(pdfFile); if (f.PageCount > 0) { int from = 1; int to = (3 > f.PageCount) ? f.PageCount : 3; int result = f.ToHtml(htmlFile, from, to); // Show resulted HTML document in a browser. if (result == 0) { System.Diagnostics.Process.Start(htmlFile); } } }
static void Main(string[] args) { string pathToPdf = @"d:\Tempos\table.pdf"; string pathToHtml = Path.ChangeExtension(pathToPdf, ".htm"); // Convert PDF file to HTML file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // You may download the latest version of SDK here: // www.sautinsoft.com/products/pdf-focus/download.php // Let's force the component to store images inside HTML document // using base-64 encoding f.HtmlOptions.IncludeImageInHtml = true; f.HtmlOptions.Title = "Simple text"; // This property is necessary only for registered version f.OpenPdf(pathToPdf); if (f.PageCount > 0) { int result = f.ToHtml(pathToHtml); //Show HTML document in browser if (result == 0) { System.Diagnostics.Process.Start(pathToHtml); } } }
static void Main(string[] args) { string pdfFile = @"..\..\..\..\..\simple text.pdf"; string htmlFile = Path.ChangeExtension(pdfFile, ".htm"); // Convert PDF file to HTML file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder". f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile); // A folder (will be created by the component) without any drive letters, only the folder as "myfolder". f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); // We recommend to use PNG type for storing images. f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png; // How to store images: Inside HTML document as base64 images or as linked separate image files. f.HtmlOptions.IncludeImageInHtml = false; // Set <title>...</title> f.HtmlOptions.Title = "Simple text"; // After purchasing the license, please insert your serial number here to activate the component: //f.Serial = "123456789"; f.OpenPdf(pdfFile); if (f.PageCount > 0) { int result = f.ToHtml(htmlFile); // Show resulted HTML document in a browser. if (result == 0) { System.Diagnostics.Process.Start(htmlFile); } } }
static void Main(string[] args) { // Here you will find various ways to store images string pdfFile = @"..\..\..\..\..\simple text.pdf"; string htmlFile = Path.ChangeExtension(pdfFile, ".htm"); // Convert PDF file to HTML file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // Way 1: Images will be stored as physical PNG files in: ImageFolder + ImageSubFolder. // For example: "d:\" + "special folder" = "d:\special folder\" /* * f.HtmlOptions.ImageFolder = @"d:\"; * f.HtmlOptions.ImageSubFolder = "special folder"; * f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png; */ // Way 2: Images will be stored as PNG files in the same directory with HTML file. // All images on each page will be combined in a single image. /* * f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile); * f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png; * f.HtmlOptions.ImageSubFolder = ""; * f.HtmlOptions.CombineImages = true; */ // Way 3: Images will be stored as Jpeg files in a special folder "my images". // Images will have name "picture100.jpg", "picture101.jpg" .. "pictureN.jpg". // Let's set the quality for jpeg images to 95 percents. f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile); f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Jpeg; f.HtmlOptions.JpegQuality = 95; f.HtmlOptions.ImageSubFolder = "my images"; f.HtmlOptions.ImageFileName = "picture"; f.HtmlOptions.ImageNumStart = 100; f.HtmlOptions.CombineImages = false; // Way 4: Images will be stored inside HTML document as base64 images. /* * f.HtmlOptions.IncludeImageInHtml = true; */ f.OpenPdf(pdfFile); if (f.PageCount > 0) { int result = f.ToHtml(htmlFile); // Show resulted HTML document in a browser. if (result == 0) { System.Diagnostics.Process.Start(htmlFile); } } }
private void btnPDF2Html_Click(object sender, EventArgs e) // PDF convert to Html Function { SautinSoft.PdfFocus Html = new SautinSoft.PdfFocus(); // 呼叫SautinSoft中的Html Convert Function Html.OpenPdf(@"C:\Users\施宗佑\OneDrive\桌面\期末報告 - 物件導向程式設計\測試用文檔\Graph.pdf"); if (Html.PageCount > 0) { int result = Html.ToHtml(@"C:\Users\施宗佑\OneDrive\桌面\期末報告 - 物件導向程式設計\NewGraph.html"); } }
/// <summary> /// Converts PDF to DOCX, RTF, HTML, Text with OCR engine. /// </summary> public void ConvertPdfToAllWithOCR(string pdfPath) { // To perform OCR we'll use free OCR library by Nicomsoft. // https://www.nicomsoft.com/products/ocr/download/ // The library is freeware and can be used in commercial application. // Also you have to insert this key: AB2A4DD5FF2A. NsOCR = new NSOCRLib.NSOCRClass(); NsOCR.Engine_SetLicenseKey("AB2A4DD5FF2A"); //required for licensed version only NsOCR.Engine_InitializeAdvanced(out CfgObj, out OcrObj, out ImgObj); SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); f.OCROptions.Method += PerformOCRNicomsoft; f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages; f.WordOptions.KeepCharScaleAndSpacing = false; string pdfFile = pdfPath; string outFile = String.Empty; f.OpenPdf(pdfFile); if (f.PageCount > 0) { // To Docx. outFile = "Result.docx"; f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx; if (f.ToWord(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To HTML. outFile = "Result.html"; f.HtmlOptions.KeepCharScaleAndSpacing = false; if (f.ToHtml(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } } else { Console.WriteLine("Error: {0}!", f.Exception.Message); Console.ReadLine(); } }
static void Main(string[] args) { string pdfFile = @"..\..\simple text.pdf"; string htmlFile = "Result.html"; // Convert PDF file to HTML file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // After purchasing the license, please insert your serial number here to activate the component: //f.Serial = "XXXXXXXXXXX"; // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder". f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile); // A folder (will be created by the component) without any drive letters, only the folder as "myfolder". f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); // Auto - the same image format as in the source PDF; // 'Jpeg' to make the document size less; // 'PNG' to keep the highest quality, but the highest size too. f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto; // How to store images: Inside HTML document as base64 images or as linked separate image files. f.HtmlOptions.IncludeImageInHtml = false; // Set <title>...</title> f.HtmlOptions.Title = String.Format("This HTML was converted from {0}.", Path.GetFileName(pdfFile)); f.OpenPdf(pdfFile); if (f.PageCount > 0) { int res = f.ToHtml(htmlFile); // Open the result for demonstration purposes. if (res == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true }); } } }
public static void ConvertToHtml(object targ) { TArgument targum = (TArgument)targ; string pdfFile = targum.PdfFile; int page = targum.PageNumber; string htmlFile = targum.HtmlFile; SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto; f.HtmlOptions.IncludeImageInHtml = false; f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); f.HtmlOptions.Title = String.Format("This document was produced from {0}.", Path.GetFileName(pdfFile)); f.HtmlOptions.ImageFileName = "picture"; f.OpenPdf(pdfFile); bool done = false; if (f.PageCount > 0) { if (page >= f.PageCount) { page = 1; } if (f.ToHtml(htmlFile, page, page) == 0) { done = true; } f.ClosePdf(); } if (done) { Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile)); } else { Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile)); } }
public static void ConvertToHtml(object targ) { TArgument targum = (TArgument)targ; string pdfFile = targum.PdfFile; int page = targum.PageNumber; string htmlFile = Path.ChangeExtension(pdfFile, ".html"); SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png; f.HtmlOptions.IncludeImageInHtml = false; f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); f.HtmlOptions.ImageFileName = "pict"; f.OpenPdf(pdfFile); bool done = false; if (f.PageCount > 0) { if (page >= f.PageCount) { page = 1; } if (f.ToHtml(htmlFile, page, page) == 0) { done = true; } f.ClosePdf(); } if (done) { Console.WriteLine("{0}\t - Done!", Path.GetFileName(pdfFile)); } else { Console.WriteLine("{0}\t - Error!", Path.GetFileName(pdfFile)); } }
/// <summary> /// Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Multipage TIFF, Text. /// </summary> public static void ConvertPdfToAll() { SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); string pdfFile = @"..\..\..\..\simple text.pdf"; string outFile = String.Empty; f.OpenPdf(pdfFile); if (f.PageCount > 0) { // To Docx. outFile = Path.ChangeExtension(pdfFile, ".docx"); f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx; if (f.ToWord(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Rtf. outFile = Path.ChangeExtension(pdfFile, ".rtf"); f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf; if (f.ToWord(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Excel. outFile = Path.ChangeExtension(pdfFile, ".xls"); f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true; if (f.ToExcel(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To HTML. outFile = Path.ChangeExtension(pdfFile, ".html"); f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true; if (f.ToHtml(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To XML. outFile = Path.ChangeExtension(pdfFile, ".xml"); f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true; if (f.ToXml(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Image. outFile = Path.ChangeExtension(pdfFile, ".png"); f.ImageOptions.Dpi = 300; f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Png; if (f.ToImage(outFile, 1) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Multipage Tiff (Black & White). outFile = Path.ChangeExtension(pdfFile, ".tiff"); f.ImageOptions.ColorDepth = PdfFocus.CImageOptions.eColorDepth.BlackWhite1bpp; if (f.ToMultipageTiff(outFile, System.Drawing.Imaging.EncoderValue.CompressionCCITT4) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Text. outFile = Path.ChangeExtension(pdfFile, ".txt"); if (f.ToText(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } } else { Console.WriteLine("Error: {0}!", f.Exception.Message); Console.ReadLine(); } }
static void Main(string[] args) { // Here you will find how to keep images in the resulting HTML document. string pdfFile = @"..\..\simple text.pdf"; string htmlFile = "Result.html"; // Convert PDF file to HTML file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // After purchasing the license, please insert your serial number here to activate the component: //f.Serial = "XXXXXXXXXXX"; // Way 1 (default): Images will be stored inside HTML document as base64, jpeg images. /* * f.HtmlOptions.IncludeImageInHtml = true; * // Auto - the same image format as in the source PDF; * // 'Jpeg' to make the document size less; * // 'PNG' to keep the highest quality, but the highest size too. * f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Jpeg; */ // Way 2: Images will be stored as JPG files in a special folder "{pdf name}_images". // Images will have names "picture100.jpg", "picture101.jpg" .. "pictureN.jpg". // Let's set the quality for jpeg to 95 percents. f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile); // Auto - the same image format as in the source PDF; // 'Jpeg' to make the document size less; // 'PNG' to keep the highest quality, but the highest size too. f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Jpeg; f.EmbeddedJpegQuality = 95; f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); f.HtmlOptions.ImageFileName = "picture"; f.HtmlOptions.ImageNumStart = 100; f.HtmlOptions.IncludeImageInHtml = false; // Way 3: Images will be stored as PNG files in the same directory with the HTML file. // All images on each page will be combined in a single image. /* * f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile); * // 'Jpeg' to make the document size less; Or 'PNG' to keep the highest quality. * f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Png; * f.HtmlOptions.ImageSubFolder = ""; * f.HtmlOptions.IncludeImageInHtml = false; */ f.OpenPdf(pdfFile); if (f.PageCount > 0) { int res = f.ToHtml(htmlFile); // Open the result for demonstration purposes. if (res == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true }); } } }