Esempi di codice in C# (CSharp) per HTMLExtractor

Esempio n. 1

0

Mostra file

File: Default.aspx.cs Progetto: jboddiford/ByteScout-SDK-SourceCode

        protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath(@".\bin\sample2.pdf");

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Embed images into HTML file
            extractor.SaveImages = ImageHandling.Embed;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            Response.Clear();
            Response.ContentType = "text/html";

            // Save extracted text to output stream
            extractor.SaveHtmlToStream(Response.OutputStream);

            Response.End();

            extractor.Dispose();
        }

Esempio n. 2

0

Mostra file

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set plain HTML extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.PlainHTML;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Convert 2-nd page to HTML and save it to file
            extractor.SaveHtmlPageToFile(1, "output.html");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

Esempio n. 3

0

Mostra file

File: Program.cs Progetto: repohoarder/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // When processing huge PDF documents you may run into OutOfMemoryException.
            // This example demonstrates a way to spare the memory by disabling page data caching.

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            using (HTMLExtractor extractor = new HTMLExtractor("demo", "demo"))
            {
                try
                {
                    // Load sample PDF document
                    extractor.LoadDocumentFromFile("sample2.pdf");

                    // Disable page data caching, so processed pages will be disposed automatically
                    extractor.PageDataCaching = PageDataCaching.None;

                    // Save result to file
                    extractor.SaveHtmlToFile("output.html");
                }
                catch (PDF2HTMLException exception)
                {
                    Console.Write(exception.ToString());
                }
            }

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

Esempio n. 4

0

Mostra file

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Input file Url
            var inputUrl = @"https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";

            // Get Input Stream
            var inpStream = GetStreamFromUrl(inputUrl);

            // Load sample PDF document
            extractor.LoadDocumentFromStream(inpStream);

            // Convert 1-st page to HTML and save it to file
            extractor.SaveHtmlPageToFile(0, "output.html");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

Esempio n. 5

0

Mostra file

File: Default.aspx.cs Progetto: remlex/ByteScout-SDK-SourceCode

        protected void Page_Load(object sender, EventArgs e)
        {
            // This test PDF file is automatically copied to the output build directory.
            String inputFile = Server.MapPath("bin\\sample2.pdf");

            // Prepare output file path
            String outputFolder = Server.MapPath("~/output/");
            String outputFile   = Path.Combine(outputFolder, "result.html");

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            // Save extracted text to output file. Document images will be created in a subfolder.
            extractor.SaveHtmlToFile(outputFile);

            // The converter knows nothing about server's home directory, so it places images to a subfolder
            // named "<filename>.images" near to the output file and generates image paths relative to the output folder.
            // We need to change those paths relative to server home directory.

            string[]      lines   = File.ReadAllLines(outputFile);
            StringBuilder builder = new StringBuilder();

            foreach (string line in lines)
            {
                // replace image source paths
                builder.AppendLine(line.Replace("sample2.pdf.images", "output/sample2.pdf.images"));
            }

            // Write the modified content to Response:

            Response.Clear();
            Response.ContentType = "text/html";

            Response.Write(builder.ToString());

            Response.End();

            extractor.Dispose();
        }

Esempio n. 6

0

Mostra file

File: Program.cs Progetto: babylon3389/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set plain HTML extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.PlainHTML;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Save extracted HTML to file
            extractor.SaveHtmlToFile("output.html");

            // Open output file in default associated application
            System.Diagnostics.Process.Start("output.html");
        }

Esempio n. 7

0

Mostra file

File: Program.cs Progetto: babylon3389/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Convert 2-nd page to HTML and save it to file
            extractor.SaveHtmlPageToFile(1, "output.html");

            // Open output file in default associated application
            System.Diagnostics.Process.Start("output.html");
        }

Esempio n. 8

0

Mostra file

File: Program.cs Progetto: sergeydzyubka/Converter

        static void Main(string[] args)
        {
            HTMLExtractor extractor = new HTMLExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@"C:\SiteAnalytics.pdf");

            ///System.IO.File.WriteAllText(@"C:\output.html", extractor.GetHTML());
            // Save extracted HTML to file
            extractor.SaveHtmlToFile("out.html");
            RemoveWaterMarks("out.html");
            //extractor.s
            // Open output file in default associated application
            //System.Diagnostics.Process.Start("output.html");
            Console.ReadKey();
        }

Esempi in C# (CSharp) per HTMLExtractor