protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath(@".\bin\sample2.pdf");

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Embed images into HTML file
            extractor.SaveImages = ImageHandling.Embed;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            Response.Clear();
            Response.ContentType = "text/html";

            // Save extracted text to output stream
            extractor.SaveHtmlToStream(Response.OutputStream);

            Response.End();

            extractor.Dispose();
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set plain HTML extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.PlainHTML;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Convert 2-nd page to HTML and save it to file
            extractor.SaveHtmlPageToFile(1, "output.html");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
Exemplo n.º 3
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Input file Url
            var inputUrl = @"https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";

            // Get Input Stream
            var inpStream = GetStreamFromUrl(inputUrl);

            // Load sample PDF document
            extractor.LoadDocumentFromStream(inpStream);

            // Convert 1-st page to HTML and save it to file
            extractor.SaveHtmlPageToFile(0, "output.html");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
        protected void Page_Load(object sender, EventArgs e)
        {
            // This test PDF file is automatically copied to the output build directory.
            String inputFile = Server.MapPath("bin\\sample2.pdf");

            // Prepare output file path
            String outputFolder = Server.MapPath("~/output/");
            String outputFile   = Path.Combine(outputFolder, "result.html");

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            // Save extracted text to output file. Document images will be created in a subfolder.
            extractor.SaveHtmlToFile(outputFile);

            // The converter knows nothing about server's home directory, so it places images to a subfolder
            // named "<filename>.images" near to the output file and generates image paths relative to the output folder.
            // We need to change those paths relative to server home directory.

            string[]      lines   = File.ReadAllLines(outputFile);
            StringBuilder builder = new StringBuilder();

            foreach (string line in lines)
            {
                // replace image source paths
                builder.AppendLine(line.Replace("sample2.pdf.images", "output/sample2.pdf.images"));
            }

            // Write the modified content to Response:

            Response.Clear();
            Response.ContentType = "text/html";

            Response.Write(builder.ToString());

            Response.End();

            extractor.Dispose();
        }