Esempio n. 1
0
        static void Main(string[] args)
        {
            const string inputFile  = "sample.pdf";
            const string resultFile = "result.pdf";

            int pageCount;

            // Get document page count
            using (var infoExtractor = new InfoExtractor("demo", "demo"))
            {
                infoExtractor.LoadDocumentFromFile(inputFile);
                pageCount = infoExtractor.GetPageCount();
            }

            // Process the document by 10-page pieces

            int numberOfThreads = pageCount / 10;

            if (pageCount - numberOfThreads * 10 > 0)
            {
                numberOfThreads += 1;
            }

            WaitHandle[] doneEvents = new WaitHandle[numberOfThreads];
            Stopwatch    stopwatch = Stopwatch.StartNew();
            int          startPage, endPage;

            string[] pieces = new string[numberOfThreads];

            for (int i = 0; i < numberOfThreads; i++)
            {
                doneEvents[i] = new ManualResetEvent(false);
                startPage     = i * 10;
                endPage       = Math.Min(pageCount - 1, (i + 1) * 10 - 1);

                pieces[i] = string.Format("temp-{0}-{1}.pdf", startPage, endPage);
                ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc),
                                             new object[] { i, doneEvents[i], inputFile, pieces[i], startPage, endPage });
            }

            // Wait for all threads
            WaitHandle.WaitAll(doneEvents);

            // Merge pieces
            using (DocumentMerger merger = new DocumentMerger("demo", "demo"))
                merger.Merge(pieces, resultFile);

            // Delete temp files
            foreach (string tempFile in pieces)
            {
                File.Delete(tempFile);
            }

            Console.WriteLine("All done in {0}.", stopwatch.Elapsed);
            Console.WriteLine();

            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }
Esempio n. 2
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.InfoExtractor instance
            InfoExtractor infoExtractor = new InfoExtractor();

            infoExtractor.RegistrationName = "demo";
            infoExtractor.RegistrationKey  = "demo";

            TextExtractor textExtractor = new TextExtractor();

            textExtractor.RegistrationName = "demo";
            textExtractor.RegistrationKey  = "demo";

            // List all PDF files in directory
            foreach (string file in Directory.GetFiles(@"..\..\..\..", "*.pdf"))
            {
                infoExtractor.LoadDocumentFromFile(file);

                Console.WriteLine("File Name:      " + Path.GetFileName(file));
                Console.WriteLine("Page Count:     " + infoExtractor.GetPageCount());
                Console.WriteLine("Author:         " + infoExtractor.Author);
                Console.WriteLine("Title:          " + infoExtractor.Title);
                Console.WriteLine("Producer:       " + infoExtractor.Producer);
                Console.WriteLine("Subject:        " + infoExtractor.Subject);
                Console.WriteLine("CreationDate:   " + infoExtractor.CreationDate);
                Console.WriteLine("Text (first 2 lines): ");

                // Load a couple of lines from each document
                textExtractor.LoadDocumentFromFile(file);
                using (StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0)))
                {
                    Console.WriteLine(stringReader.ReadLine());
                    Console.WriteLine(stringReader.ReadLine());
                }
                Console.WriteLine();
            }

            // Cleanup
            infoExtractor.Dispose();
            textExtractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            const string inputFileName  = "sample.pdf";
            const string resultFileName = "result.txt";
            int          CHUNK_SIZE     = 10;

            int pageCount;

            // Get document page count
            using (var infoExtractor = new InfoExtractor("demo", "demo"))
            {
                infoExtractor.LoadDocumentFromFile(inputFileName);
                pageCount = infoExtractor.GetPageCount();
            }

            Stopwatch stopwatch = Stopwatch.StartNew();

            int numberOfThreads = pageCount / CHUNK_SIZE;

            if (pageCount - numberOfThreads * CHUNK_SIZE > 0)
            {
                numberOfThreads += 1;
            }

            ManualResetEvent allFinishedEvent = new ManualResetEvent(false);

            _runningThreadsCounter = 0;
            string[] chunks = new string[numberOfThreads];

            for (int i = 0; i < numberOfThreads; i++)
            {
                // Wait for the queue
                _threadLimiter.WaitOne();

                var startPage = i * CHUNK_SIZE;
                var endPage   = Math.Min(pageCount - 1, (i + 1) * CHUNK_SIZE - 1);

                // Prepare temp file name for the chunk
                chunks[i] = string.Format("temp-{0}-{1}.txt", startPage, endPage);

                // Increase the thread counter
                Interlocked.Increment(ref _runningThreadsCounter);

                ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc),
                                             new object[] { i, allFinishedEvent, inputFileName, chunks[i], startPage, endPage });
            }

            // Wait for all threads
            allFinishedEvent.WaitOne();

            // Merge pieces into a single text file
            using (Stream resultFileStream = File.Create(resultFileName))
            {
                foreach (string tempFile in chunks)
                {
                    using (Stream srcStream = File.OpenRead(tempFile))
                        srcStream.CopyTo(resultFileStream);
                }
            }

            // Delete temp files
            foreach (string tempFile in chunks)
            {
                File.Delete(tempFile);
            }


            Console.WriteLine("All done in {0}.", stopwatch.Elapsed);
            Console.WriteLine();

            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }