C# (CSharp) HTMLExtractor примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: HTMLExtractor

Примеров на hotexamples.com: 8

C# (CSharp) HTMLExtractor - 8 примеров найдено. Это лучшие примеры C# (CSharp) кода для HTMLExtractor, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LoadDocumentFromFile(6)

Dispose(4)

SaveHtmlToFile(4)

SaveHtmlPageToFile(3)

LoadDocumentFromStream(1)

SaveHtmlToStream(1)

Пример #1

Показать файл

Файл: Default.aspx.cs Проект: jboddiford/ByteScout-SDK-SourceCode

        protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath(@".\bin\sample2.pdf");

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Embed images into HTML file
            extractor.SaveImages = ImageHandling.Embed;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            Response.Clear();
            Response.ContentType = "text/html";

            // Save extracted text to output stream
            extractor.SaveHtmlToStream(Response.OutputStream);

            Response.End();

            extractor.Dispose();
        }

Пример #2

Показать файл

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set plain HTML extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.PlainHTML;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Convert 2-nd page to HTML and save it to file
            extractor.SaveHtmlPageToFile(1, "output.html");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

Пример #3

Показать файл

Файл: Program.cs Проект: repohoarder/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // When processing huge PDF documents you may run into OutOfMemoryException.
            // This example demonstrates a way to spare the memory by disabling page data caching.

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            using (HTMLExtractor extractor = new HTMLExtractor("demo", "demo"))
            {
                try
                {
                    // Load sample PDF document
                    extractor.LoadDocumentFromFile("sample2.pdf");

                    // Disable page data caching, so processed pages will be disposed automatically
                    extractor.PageDataCaching = PageDataCaching.None;

                    // Save result to file
                    extractor.SaveHtmlToFile("output.html");
                }
                catch (PDF2HTMLException exception)
                {
                    Console.Write(exception.ToString());
                }
            }

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

Пример #4

Показать файл

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Input file Url
            var inputUrl = @"https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf";

            // Get Input Stream
            var inpStream = GetStreamFromUrl(inputUrl);

            // Load sample PDF document
            extractor.LoadDocumentFromStream(inpStream);

            // Convert 1-st page to HTML and save it to file
            extractor.SaveHtmlPageToFile(0, "output.html");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.html");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

Пример #5

Показать файл

Файл: Default.aspx.cs Проект: remlex/ByteScout-SDK-SourceCode

        protected void Page_Load(object sender, EventArgs e)
        {
            // This test PDF file is automatically copied to the output build directory.
            String inputFile = Server.MapPath("bin\\sample2.pdf");

            // Prepare output file path
            String outputFolder = Server.MapPath("~/output/");
            String outputFile   = Path.Combine(outputFolder, "result.html");

            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            // Save extracted text to output file. Document images will be created in a subfolder.
            extractor.SaveHtmlToFile(outputFile);

            // The converter knows nothing about server's home directory, so it places images to a subfolder
            // named "<filename>.images" near to the output file and generates image paths relative to the output folder.
            // We need to change those paths relative to server home directory.

            string[]      lines   = File.ReadAllLines(outputFile);
            StringBuilder builder = new StringBuilder();

            foreach (string line in lines)
            {
                // replace image source paths
                builder.AppendLine(line.Replace("sample2.pdf.images", "output/sample2.pdf.images"));
            }

            // Write the modified content to Response:

            Response.Clear();
            Response.ContentType = "text/html";

            Response.Write(builder.ToString());

            Response.End();

            extractor.Dispose();
        }

Пример #6

Показать файл

Файл: Program.cs Проект: babylon3389/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set plain HTML extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.PlainHTML;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Save extracted HTML to file
            extractor.SaveHtmlToFile("output.html");

            // Open output file in default associated application
            System.Diagnostics.Process.Start("output.html");
        }

Пример #7

Показать файл

Файл: Program.cs Проект: babylon3389/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDF2HTML.HTMLExtractor instance
            HTMLExtractor extractor = new HTMLExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Convert 2-nd page to HTML and save it to file
            extractor.SaveHtmlPageToFile(1, "output.html");

            // Open output file in default associated application
            System.Diagnostics.Process.Start("output.html");
        }

Пример #8

Показать файл

Файл: Program.cs Проект: sergeydzyubka/Converter

        static void Main(string[] args)
        {
            HTMLExtractor extractor = new HTMLExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Set HTML with CSS extraction mode
            extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@"C:\SiteAnalytics.pdf");

            ///System.IO.File.WriteAllText(@"C:\output.html", extractor.GetHTML());
            // Save extracted HTML to file
            extractor.SaveHtmlToFile("out.html");
            RemoveWaterMarks("out.html");
            //extractor.s
            // Open output file in default associated application
            //System.Diagnostics.Process.Start("output.html");
            Console.ReadKey();
        }