protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath(@".\bin\sample2.pdf"); // Create Bytescout.PDF2HTML.HTMLExtractor instance HTMLExtractor extractor = new HTMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Set HTML with CSS extraction mode extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS; // Embed images into HTML file extractor.SaveImages = ImageHandling.Embed; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); Response.Clear(); Response.ContentType = "text/html"; // Save extracted text to output stream extractor.SaveHtmlToStream(Response.OutputStream); Response.End(); extractor.Dispose(); }
static void Main(string[] args) { // Create Bytescout.PDF2HTML.HTMLExtractor instance HTMLExtractor extractor = new HTMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Set plain HTML extraction mode extractor.ExtractionMode = HTMLExtractionMode.PlainHTML; // Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf"); // Convert 2-nd page to HTML and save it to file extractor.SaveHtmlPageToFile(1, "output.html"); // Cleanup extractor.Dispose(); // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { // When processing huge PDF documents you may run into OutOfMemoryException. // This example demonstrates a way to spare the memory by disabling page data caching. // Create Bytescout.PDF2HTML.HTMLExtractor instance using (HTMLExtractor extractor = new HTMLExtractor("demo", "demo")) { try { // Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf"); // Disable page data caching, so processed pages will be disposed automatically extractor.PageDataCaching = PageDataCaching.None; // Save result to file extractor.SaveHtmlToFile("output.html"); } catch (PDF2HTMLException exception) { Console.Write(exception.ToString()); } } // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { // Create Bytescout.PDF2HTML.HTMLExtractor instance HTMLExtractor extractor = new HTMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Set HTML with CSS extraction mode extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS; // Input file Url var inputUrl = @"https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf"; // Get Input Stream var inpStream = GetStreamFromUrl(inputUrl); // Load sample PDF document extractor.LoadDocumentFromStream(inpStream); // Convert 1-st page to HTML and save it to file extractor.SaveHtmlPageToFile(0, "output.html"); // Cleanup extractor.Dispose(); // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
protected void Page_Load(object sender, EventArgs e) { // This test PDF file is automatically copied to the output build directory. String inputFile = Server.MapPath("bin\\sample2.pdf"); // Prepare output file path String outputFolder = Server.MapPath("~/output/"); String outputFile = Path.Combine(outputFolder, "result.html"); // Create Bytescout.PDF2HTML.HTMLExtractor instance HTMLExtractor extractor = new HTMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Set HTML with CSS extraction mode extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); // Save extracted text to output file. Document images will be created in a subfolder. extractor.SaveHtmlToFile(outputFile); // The converter knows nothing about server's home directory, so it places images to a subfolder // named "<filename>.images" near to the output file and generates image paths relative to the output folder. // We need to change those paths relative to server home directory. string[] lines = File.ReadAllLines(outputFile); StringBuilder builder = new StringBuilder(); foreach (string line in lines) { // replace image source paths builder.AppendLine(line.Replace("sample2.pdf.images", "output/sample2.pdf.images")); } // Write the modified content to Response: Response.Clear(); Response.ContentType = "text/html"; Response.Write(builder.ToString()); Response.End(); extractor.Dispose(); }
static void Main(string[] args) { // Create Bytescout.PDF2HTML.HTMLExtractor instance HTMLExtractor extractor = new HTMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Set plain HTML extraction mode extractor.ExtractionMode = HTMLExtractionMode.PlainHTML; // Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf"); // Save extracted HTML to file extractor.SaveHtmlToFile("output.html"); // Open output file in default associated application System.Diagnostics.Process.Start("output.html"); }
static void Main(string[] args) { // Create Bytescout.PDF2HTML.HTMLExtractor instance HTMLExtractor extractor = new HTMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Set HTML with CSS extraction mode extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS; // Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf"); // Convert 2-nd page to HTML and save it to file extractor.SaveHtmlPageToFile(1, "output.html"); // Open output file in default associated application System.Diagnostics.Process.Start("output.html"); }
static void Main(string[] args) { HTMLExtractor extractor = new HTMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Set HTML with CSS extraction mode extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS; // Load sample PDF document extractor.LoadDocumentFromFile(@"C:\SiteAnalytics.pdf"); ///System.IO.File.WriteAllText(@"C:\output.html", extractor.GetHTML()); // Save extracted HTML to file extractor.SaveHtmlToFile("out.html"); RemoveWaterMarks("out.html"); //extractor.s // Open output file in default associated application //System.Diagnostics.Process.Start("output.html"); Console.ReadKey(); }