Ejemplos de código de DocumentSplitter en C# (CSharp)

Ejemplo n.º 1

0

Mostrar archivo

Archivo: Program.cs Proyecto: bytescout/pdf-extractor-sdk-samples-c-sharp

        static void Main(string[] args)
        {
            string inputFile = "sample.pdf";

            using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
            {
                splitter.OptimizeSplittedDocuments = true;

                //# Create list of page numbers in form "1,2,3,4,5,...,n" #
                // 1. Get Page Count
                int pageCount = splitter.GetPageCount(inputFile);
                // 2. Get all page numbers
                var pageNumbers = Enumerable.Range(1, pageCount);
                // 3. Split Range
                var splitRange = string.Join(",", pageNumbers);

                // Perform split by ranges
                string[] files = splitter.Split(inputFile, splitRange);

                Console.WriteLine(@"Splitted by parts: ");
                foreach (string file in files)
                {
                    Console.WriteLine("    " + Path.GetFullPath(file));
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Ejemplo n.º 2

0

Mostrar archivo

        public int Split(string filename, string password)
        {
            var src  = GetSource(filename);
            var info = IO.Get(src);
            var name = info.BaseName;
            var ext  = info.Extension;
            var dest = Path(Args(name));

            IO.Copy(src, IO.Combine(dest, $"{name}-01{ext}"), true);

            using (var w = new DocumentSplitter(IO))
            {
                var op = new OpenOption {
                    SaveMemory = false
                };
                w.Add(new DocumentReader(src, password, op));
                w.Save(dest);

                var n   = w.Results.Count;
                var cmp = IO.GetFiles(dest).Length;
                Assert.That(cmp, Is.EqualTo(n + 1));
                Assert.That(IO.Exists(IO.Combine(dest, $"{name}-01 (1){ext}")));

                w.Reset();
                Assert.That(w.Results.Count, Is.EqualTo(0));

                return(n);
            }
        }

Ejemplo n.º 3

0

Mostrar archivo

Archivo: Program.cs Proyecto: wushian/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create and setup Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor("demo", "demo");

            // Load PDF document
            extractor.LoadDocumentFromFile(InputFile);

            // List to keep non-empty page numbers
            List <string> nonEmptyPages = new List <string>();

            // Iterate through pages
            for (int pageIndex = 0; pageIndex < extractor.GetPageCount(); pageIndex++)
            {
                // Extract page text
                string pageText = extractor.GetTextFromPage(pageIndex);
                // If extracted text is not empty keep the page number
                if (pageText.Length > 0)
                {
                    nonEmptyPages.Add((pageIndex + 1).ToString());
                }
            }

            // Cleanup
            extractor.Dispose();


            // Form comma-separated list of page numbers to split("1,3,5")
            string ranges = string.Join(",", nonEmptyPages);

            // Create Bytescout.PDFExtractor.DocumentSplitter instance
            DocumentSplitter splitter = new DocumentSplitter("demo", "demo");

            splitter.OptimizeSplittedDocuments = true;

            // Split document by non-empty in temp folder
            string[] parts = splitter.Split(InputFile, ranges, TempFolder);

            // Cleanup
            splitter.Dispose();


            // Create Bytescout.PDFExtractor.DocumentMerger instance
            DocumentMerger merger = new DocumentMerger("demo", "demo");

            // Merge parts
            merger.Merge(parts, OutputFile);

            // Cleanup
            merger.Dispose();

            // Delete temp folder
            Directory.Delete(TempFolder, true);


            // Open the result file in default PDF viewer (for demo purposes)
            Process.Start(OutputFile);
        }

Ejemplo n.º 4

0

Mostrar archivo

Archivo: Program.cs Proyecto: jboddiford/ByteScout-SDK-SourceCode

        private static void ThreadProc(object stateInfo)
        {
            int threadIndex             = (int)((object[])stateInfo)[0];
            ManualResetEvent doneEvent  = (ManualResetEvent)((object[])stateInfo)[1];
            string           inputFile  = (string)((object[])stateInfo)[2];
            string           outputFile = (string)((object[])stateInfo)[3];
            int startPage = (int)((object[])stateInfo)[4];
            int endPage   = (int)((object[])stateInfo)[5];

            try
            {
                Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage);

                Stopwatch stopwatch = Stopwatch.StartNew();

                // Extract a piece of document
                string chunk = string.Format("temp-{0}-{1}", startPage, endPage);
                using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
                    splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1);

                // Process the piece
                using (SearchablePDFMaker searchablePdfMaker = new SearchablePDFMaker("demo", "demo"))
                {
                    searchablePdfMaker.OCRDetectPageRotation = true;
                    searchablePdfMaker.OCRLanguageDataFolder = @"C:\Program Files\Bytescout PDF Extractor SDK\net4.00\tessdata";
                    searchablePdfMaker.LoadDocumentFromFile(chunk);

                    // 300 DPI resolution is recommended.
                    // Using of higher values will slow down the processing but does not guarantee the higher quality.
                    searchablePdfMaker.OCRResolution = 300;

                    searchablePdfMaker.MakePDFSearchable(outputFile);
                }

                File.Delete(chunk);

                Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed);
            }
            finally
            {
                // Signal the thread is finished
                doneEvent.Set();

                // Release semaphore
                ThreadLimiter.Release();
            }
        }

Ejemplo n.º 5

0

Mostrar archivo

Archivo: Program.cs Proyecto: bytescout/data-extraction-suite-samples-c-sharp

        static void Main(string[] args)
        {
            string inputFile = @".\encrypted (password is 'password').pdf";

            using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
            {
                // Handle `PasswordRequired` event
                splitter.PasswordRequired += new PasswordEventHandler(splitter_PasswordRequired);

                // Ignore document permissions
                splitter.CheckPermissions = false;

                // Split document
                splitter.Split(inputFile, "part1.pdf", @"part2.pdf", 3);
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Ejemplo n.º 6

0

Mostrar archivo

Archivo: Program.cs Proyecto: jboddiford/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            string inputFile = @".\sample2.pdf";

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            int pageCount = extractor.GetPageCount();

            // Search each page for a keyword
            for (int i = 0; i < pageCount; i++)
            {
                if (extractor.Find(i, "bombardment", false))
                {
                    // Extract page
                    using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
                    {
                        splitter.OptimizeSplittedDocuments = true;

                        int    pageNumber = i + 1; // (!) page number in ExtractPage() is 1-based
                        string outputFile = @".\page" + pageNumber + ".pdf";
                        splitter.ExtractPage(inputFile, outputFile, pageNumber);

                        Console.WriteLine("Extracted page " + pageNumber + " to file \"" + outputFile + "\"");
                    }
                }
            }

            // Cleanup
            extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Ejemplo n.º 7

0

Mostrar archivo

        static void Main(string[] args)
        {
            string inputFile = @".\sample.pdf";

            using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
            {
                splitter.OptimizeSplittedDocuments = true;

                // Extracting specific page:
                // =========================

                splitter.ExtractPage(inputFile, "page3.pdf", 3); // (!) Note: page number is 1-based.

                Console.WriteLine(@"Extracted page 3 to file ""page3.pdf""");
                Console.WriteLine();

                // Split in two parts:
                // ===================

                splitter.Split(inputFile, "part1.pdf", "part2.pdf", 3); // (!) Note: page number is 1-based.

                Console.WriteLine(@"Splitted at page 3 to files ""part1.pdf"" and ""part2.pdf""");
                Console.WriteLine();

                // Split by ranges:
                // ================

                string[] files = splitter.Split(inputFile, "1-3,4-6,7,8-"); // (!) Note: page numbers are 1-based; ending "-" means "to the end".

                Console.WriteLine(@"Splitted by ranges: ");
                foreach (string file in files)
                {
                    Console.WriteLine("    " + Path.GetFileName(file));
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Ejemplo n.º 8

0

Mostrar archivo

        private static void ThreadProc(object stateInfo)
        {
            int threadIndex             = (int)((object[])stateInfo)[0];
            ManualResetEvent doneEvent  = (ManualResetEvent)((object[])stateInfo)[1];
            string           inputFile  = (string)((object[])stateInfo)[2];
            string           outputFile = (string)((object[])stateInfo)[3];
            int startPage = (int)((object[])stateInfo)[4];
            int endPage   = (int)((object[])stateInfo)[5];

            try
            {
                Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage);

                Stopwatch stopwatch = Stopwatch.StartNew();

                // Extract a piece of document
                string chunk = string.Format("temp-{0}-{1}", startPage, endPage);
                using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
                    splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1);

                /*
                 * By default, "SearchablePDFMaker" uses one of the standard PDF fonts to apply
                 * recognized text over the scanned document. Such fonts contain only basic characters
                 * from ISO-8859-1 charset.
                 * If you run OCR for one of the languages with characters that are not present in the default
                 * encoding, you should explicitly specify the font that contains the required characters
                 * using ".LabelingFont" property.
                 * If you run the application in Windows with a selected locale that matches OCR language,
                 * it will be enough to specify the usual font "Arial". But if your app will run in an unknown
                 * environment (for example, in some virtual machine) you will need to install some full Unicode
                 * font (e.g. "Arial Unicode MS") and then use it with SearchablePDFMaker:
                 *
                 * //searchablePDFMaker.LabelingFont = "Arial Unicode MS";
                 */
                // Process the piece
                using (SearchablePDFMaker searchablePdfMaker = new SearchablePDFMaker("demo", "demo"))
                {
                    searchablePdfMaker.OCRDetectPageRotation = true;
                    searchablePdfMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";
                    searchablePdfMaker.LoadDocumentFromFile(chunk);

                    // 300 DPI resolution is recommended.
                    // Using of higher values will slow down the processing but does not guarantee the higher quality.
                    searchablePdfMaker.OCRResolution = 300;

                    searchablePdfMaker.MakePDFSearchable(outputFile);
                }

                File.Delete(chunk);

                Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed);
            }
            finally
            {
                // Signal the thread is finished
                doneEvent.Set();

                // Release semaphore
                ThreadLimiter.Release();
            }
        }

Ejemplos de DocumentSplitter en C# (CSharp)