protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath(@".\bin\sample1.pdf"); // Create Bytescout.PDFExtractor.InfoExtractor instance InfoExtractor extractor = new InfoExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); // Display document information Response.Clear(); Response.ContentType = "text/html"; Response.Write("Author: " + extractor.Author + "<br/>"); Response.Write("Creator: " + extractor.Creator + "<br/>"); Response.Write("Producer: " + extractor.Producer + "<br/>"); Response.Write("Subject: " + extractor.Subject + "<br/>"); Response.Write("Title: " + extractor.Title + "<br/>"); Response.Write("CreationDate: " + extractor.CreationDate + "<br/>"); Response.Write("Keywords: " + extractor.Keywords + "<br/>"); Response.Write("Bookmarks: " + extractor.Bookmarks + "<br/>"); Response.Write("Encrypted: " + extractor.Encrypted + "<br/>"); Response.End(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.InfoExtractor instance InfoExtractor extractor = new InfoExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\sample1.pdf"); Console.WriteLine("Author: " + extractor.Author); Console.WriteLine("Creator: " + extractor.Creator); Console.WriteLine("Producer: " + extractor.Producer); Console.WriteLine("Subject: " + extractor.Subject); Console.WriteLine("Title: " + extractor.Title); Console.WriteLine("CreationDate: " + extractor.CreationDate); Console.WriteLine("Keywords: " + extractor.Keywords); Console.WriteLine("Bookmarks: " + extractor.Bookmarks); Console.WriteLine("Encrypted: " + extractor.Encrypted); // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { const string inputFile = "sample.pdf"; const string resultFile = "result.pdf"; int pageCount; // Get document page count using (var infoExtractor = new InfoExtractor("demo", "demo")) { infoExtractor.LoadDocumentFromFile(inputFile); pageCount = infoExtractor.GetPageCount(); } // Process the document by 10-page pieces int numberOfThreads = pageCount / 10; if (pageCount - numberOfThreads * 10 > 0) { numberOfThreads += 1; } WaitHandle[] doneEvents = new WaitHandle[numberOfThreads]; Stopwatch stopwatch = Stopwatch.StartNew(); int startPage, endPage; string[] pieces = new string[numberOfThreads]; for (int i = 0; i < numberOfThreads; i++) { doneEvents[i] = new ManualResetEvent(false); startPage = i * 10; endPage = Math.Min(pageCount - 1, (i + 1) * 10 - 1); pieces[i] = string.Format("temp-{0}-{1}.pdf", startPage, endPage); ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc), new object[] { i, doneEvents[i], inputFile, pieces[i], startPage, endPage }); } // Wait for all threads WaitHandle.WaitAll(doneEvents); // Merge pieces using (DocumentMerger merger = new DocumentMerger("demo", "demo")) merger.Merge(pieces, resultFile); // Delete temp files foreach (string tempFile in pieces) { File.Delete(tempFile); } Console.WriteLine("All done in {0}.", stopwatch.Elapsed); Console.WriteLine(); Console.WriteLine("Press any key to exit..."); Console.ReadKey(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.InfoExtractor instance InfoExtractor infoExtractor = new InfoExtractor(); infoExtractor.RegistrationName = "demo"; infoExtractor.RegistrationKey = "demo"; TextExtractor textExtractor = new TextExtractor(); textExtractor.RegistrationName = "demo"; textExtractor.RegistrationKey = "demo"; // List all PDF files in directory foreach (string file in Directory.GetFiles(@"..\..\..\..", "*.pdf")) { infoExtractor.LoadDocumentFromFile(file); Console.WriteLine("File Name: " + Path.GetFileName(file)); Console.WriteLine("Page Count: " + infoExtractor.GetPageCount()); Console.WriteLine("Author: " + infoExtractor.Author); Console.WriteLine("Title: " + infoExtractor.Title); Console.WriteLine("Producer: " + infoExtractor.Producer); Console.WriteLine("Subject: " + infoExtractor.Subject); Console.WriteLine("CreationDate: " + infoExtractor.CreationDate); Console.WriteLine("Text (first 2 lines): "); // Load a couple of lines from each document textExtractor.LoadDocumentFromFile(file); using (StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0))) { Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(stringReader.ReadLine()); } Console.WriteLine(); } // Cleanup infoExtractor.Dispose(); textExtractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { const string inputFileName = "sample.pdf"; const string resultFileName = "result.txt"; int CHUNK_SIZE = 10; int pageCount; // Get document page count using (var infoExtractor = new InfoExtractor("demo", "demo")) { infoExtractor.LoadDocumentFromFile(inputFileName); pageCount = infoExtractor.GetPageCount(); } Stopwatch stopwatch = Stopwatch.StartNew(); int numberOfThreads = pageCount / CHUNK_SIZE; if (pageCount - numberOfThreads * CHUNK_SIZE > 0) { numberOfThreads += 1; } ManualResetEvent allFinishedEvent = new ManualResetEvent(false); _runningThreadsCounter = 0; string[] chunks = new string[numberOfThreads]; for (int i = 0; i < numberOfThreads; i++) { // Wait for the queue _threadLimiter.WaitOne(); var startPage = i * CHUNK_SIZE; var endPage = Math.Min(pageCount - 1, (i + 1) * CHUNK_SIZE - 1); // Prepare temp file name for the chunk chunks[i] = string.Format("temp-{0}-{1}.txt", startPage, endPage); // Increase the thread counter Interlocked.Increment(ref _runningThreadsCounter); ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc), new object[] { i, allFinishedEvent, inputFileName, chunks[i], startPage, endPage }); } // Wait for all threads allFinishedEvent.WaitOne(); // Merge pieces into a single text file using (Stream resultFileStream = File.Create(resultFileName)) { foreach (string tempFile in chunks) { using (Stream srcStream = File.OpenRead(tempFile)) srcStream.CopyTo(resultFileStream); } } // Delete temp files foreach (string tempFile in chunks) { File.Delete(tempFile); } Console.WriteLine("All done in {0}.", stopwatch.Elapsed); Console.WriteLine(); Console.WriteLine("Press any key to exit..."); Console.ReadKey(); }