static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor extractor = new CSVExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample3.pdf"); //extractor.CSVSeparatorSymbol = ","; // you can change CSV separator symbol (if needed) from "," symbol to another if needed for non-US locales extractor.SaveCSVToFile("output.csv"); extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Data has been extracted to 'output.csv' file."); Console.WriteLine(); Console.WriteLine("Press any key to continue and open CSV in default CSV viewer (or Excel)..."); Console.ReadKey(); Process.Start("output.csv"); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor extractor = new CSVExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample3.pdf"); // you can change CSV separator symbol (if needed) from "," symbol to another if needed for non-US locales //extractor.CSVSeparatorSymbol = ","; // Save extracted CSV data extractor.SaveCSVToFile("output.csv"); // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Data has been extracted to 'output.csv' file."); Console.WriteLine(); Console.WriteLine("Press any key to continue and open CSV in default CSV viewer (or Excel)..."); Console.ReadKey(); // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.csv"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor extractor = new CSVExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("borderless_table.pdf"); // Set extraction columns explicitly. // Coordinates in CustomExtractionColumns must match the left edges of the columns. // To get coordinates in PDF points you can use PDF Multitool application // installed with the SDK. It shows mouse cursor coodinates in PDF points in the toolbar. extractor.CustomExtractionColumns = new double[] { 0, 124.5, 185, 241 }; // Save extracted CSV data extractor.SaveCSVToFile("output.csv"); // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Data has been extracted to 'output.csv' file."); Console.WriteLine(); Console.WriteLine("Press any key to continue and open CSV in default CSV viewer (or Excel)..."); Console.ReadKey(); // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.csv"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor extractor = new CSVExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample3.pdf"); //extractor.CSVSeparatorSymbol = ","; // you can change CSV separator symbol (if needed) from "," symbol to another if needed for non-US locales // Get page count int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { string fileName = "page" + i + ".csv"; // Save extracted page text to file extractor.SavePageCSVToFile(i, fileName); } // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Data has been extracted to separate files for pages."); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor extractor = new CSVExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\SampleGroupDisabilityForm.pdf"); // Set extraction area extractor.SetExtractionArea(new System.Drawing.RectangleF(27F, 324.8F, 554.3F, 358.5F)); // Check whether rows can be grouped extractor.LineGroupingMode = LineGroupingMode.GroupByRows; // Extract results var outputFile = "result.csv"; extractor.SaveCSVToFile(outputFile); // Cleanup extractor.Dispose(); // Open with default associated program ProcessStartInfo processStartInfo = new ProcessStartInfo(outputFile); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor csvExtractor = new CSVExtractor(); csvExtractor.RegistrationName = "demo"; csvExtractor.RegistrationKey = "demo"; // Create Bytescout.PDFExtractor.TableDetector instance TableDetector tableDetector = new TableDetector(); tableDetector.RegistrationKey = "demo"; tableDetector.RegistrationName = "demo"; // Set table detection mode to "bordered tables" - best for tables with closed solid borders. tableDetector.ColumnDetectionMode = ColumnDetectionMode.BorderedTables; // We should define what kind of tables we should detect. // So we set min required number of columns to 3 ... tableDetector.DetectionMinNumberOfColumns = 3; // ... and we set min required number of rows to 3 tableDetector.DetectionMinNumberOfRows = 3; // Load sample PDF document csvExtractor.LoadDocumentFromFile(@".\sample3.pdf"); tableDetector.LoadDocumentFromFile(@".\sample3.pdf"); // Get page count int pageCount = tableDetector.GetPageCount(); for (int i = 0; i < pageCount; i++) { int t = 1; // Find first table and continue if found if (tableDetector.FindTable(i)) { do { // Set extraction area for CSV extractor to rectangle received from the table detector csvExtractor.SetExtractionArea(tableDetector.FoundTableLocation); // Export the table to CSV file csvExtractor.SavePageCSVToFile(i, "page-" + i + "-table-" + t + ".csv"); t++; }while (tableDetector.FindNextTable()); // search next table } } // Cleanup csvExtractor.Dispose(); tableDetector.Dispose(); // Open first output file in default associated application (for demo purposes) ProcessStartInfo processStartInfo = new ProcessStartInfo("page-0-table-1.csv"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { /* * Please note: Streams can be read/write to azure blobs, so in this example, * we're demonstrating how to read pdf from stream, convert to csv, * and write to csv stream */ // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor extractor = new CSVExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Get input stream var inputStream = GetMemoryStream("sample3.pdf"); // Load sample PDF document from stream extractor.LoadDocumentFromStream(inputStream); // you can change CSV separator symbol (if needed) from "," symbol to another if needed for non-US locales //extractor.CSVSeparatorSymbol = ","; // Save extracted CSV data to output stream var outputStream = new MemoryStream(); extractor.SaveCSVToStream(outputStream); // Save output stream to file, so we can take a look WriteStreamToFile(outputStream, "output.csv"); // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Data has been extracted to 'output.csv' file."); Console.WriteLine(); Console.WriteLine("Press any key to continue and open CSV in default CSV viewer (or Excel)..."); Console.ReadKey(); // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.csv"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor extractor = new CSVExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample_ocr.pdf"); // Enable Optical Character Recognition (OCR) // in .Auto mode (SDK automatically checks if needs to use OCR or not) extractor.OCRMode = OCRMode.Auto; // Set the location of OCR language data files extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"; // Set OCR language extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder // Find more language files at https://github.com/bytescout/ocrdata // Set PDF document rendering resolution extractor.OCRResolution = 300; // You can also apply various preprocessing filters // to improve the recognition on low-quality scans. // Automatically deskew skewed scans //extractor.OCRImagePreprocessingFilters.AddDeskew(); // Remove vertical or horizontal lines (sometimes helps to avoid OCR engine's page segmentation errors) //extractor.OCRImagePreprocessingFilters.AddVerticalLinesRemover(); //extractor.OCRImagePreprocessingFilters.AddHorizontalLinesRemover(); // Repair broken letters //extractor.OCRImagePreprocessingFilters.AddDilate(); // Remove noise //extractor.OCRImagePreprocessingFilters.AddMedian(); // Apply Gamma Correction //extractor.OCRImagePreprocessingFilters.AddGammaCorrection(); // Add Contrast //extractor.OCRImagePreprocessingFilters.AddContrast(20); // (!) You can use new OCRAnalyser class to find an optimal set of image preprocessing // filters for your specific document. // See "OCR Analyser" example. // Save extracted text to file extractor.SaveCSVToFile("output.csv"); // Cleanup extractor.Dispose(); // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.csv"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.CSVExtractor instance CSVExtractor csvExtractor = new CSVExtractor(); csvExtractor.RegistrationName = "demo"; csvExtractor.RegistrationKey = "demo"; // Create Bytescout.PDFExtractor.TableDetector2 instance TableDetector2 tableDetector = new TableDetector2(); tableDetector.RegistrationKey = "demo"; tableDetector.RegistrationName = "demo"; // Load sample PDF document csvExtractor.LoadDocumentFromFile(@".\sample_borderless.pdf"); tableDetector.LoadDocumentFromFile(@".\sample_borderless.pdf"); // Get page count int pageCount = tableDetector.GetPageCount(); var extractedCsvFiles = new List <string>(); for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) { var foundTables = tableDetector.FindTables(pageIndex).ToArray(); // Find first table and continue if found if (foundTables.Length > 0) { for (int indexTable = 0; indexTable < foundTables.Length; indexTable++) { // Set extraction area for CSV extractor to rectangle received from the table detector csvExtractor.SetExtractionArea(foundTables[indexTable].Bounds); // Result CSV file name var outputCsvName = $"page-{pageIndex + 1}-table-{indexTable + 1}.csv"; // Export the table to CSV file csvExtractor.SavePageCSVToFile(pageIndex, outputCsvName); extractedCsvFiles.Add(outputCsvName); } } } // Cleanup csvExtractor.Dispose(); tableDetector.Dispose(); // Show Summary Console.Clear(); if (extractedCsvFiles.Count > 0) { Console.WriteLine($"Total {extractedCsvFiles.Count} tables found!"); Console.WriteLine("--------------------------"); Console.WriteLine(string.Join("\n", extractedCsvFiles)); } else { Console.WriteLine("No Table Found!"); } Console.ReadLine(); }