CSVExtractor.SetExtractionArea C# (CSharp)のコード例

コード例 #1

0

ファイルを表示

ファイル: Program.cs プロジェクト: bytescout/pdf-extractor-sdk-samples-c-sharp

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.CSVExtractor instance
            CSVExtractor extractor = new CSVExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\SampleGroupDisabilityForm.pdf");

            // Set extraction area
            extractor.SetExtractionArea(new System.Drawing.RectangleF(27F, 324.8F, 554.3F, 358.5F));

            // Check whether rows can be grouped
            extractor.LineGroupingMode = LineGroupingMode.GroupByRows;

            // Extract results
            var outputFile = "result.csv";

            extractor.SaveCSVToFile(outputFile);

            // Cleanup
            extractor.Dispose();

            // Open with default associated program
            ProcessStartInfo processStartInfo = new ProcessStartInfo(outputFile);

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

コード例 #2

0

ファイルを表示

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.CSVExtractor instance
            CSVExtractor csvExtractor = new CSVExtractor();

            csvExtractor.RegistrationName = "demo";
            csvExtractor.RegistrationKey  = "demo";

            // Create Bytescout.PDFExtractor.TableDetector instance
            TableDetector tableDetector = new TableDetector();

            tableDetector.RegistrationKey  = "demo";
            tableDetector.RegistrationName = "demo";

            // Set table detection mode to "bordered tables" - best for tables with closed solid borders.
            tableDetector.ColumnDetectionMode = ColumnDetectionMode.BorderedTables;

            // We should define what kind of tables we should detect.
            // So we set min required number of columns to 3 ...
            tableDetector.DetectionMinNumberOfColumns = 3;
            // ... and we set min required number of rows to 3
            tableDetector.DetectionMinNumberOfRows = 3;

            // Load sample PDF document
            csvExtractor.LoadDocumentFromFile(@".\sample3.pdf");
            tableDetector.LoadDocumentFromFile(@".\sample3.pdf");

            // Get page count
            int pageCount = tableDetector.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                int t = 1;
                // Find first table and continue if found
                if (tableDetector.FindTable(i))
                {
                    do
                    {
                        // Set extraction area for CSV extractor to rectangle received from the table detector
                        csvExtractor.SetExtractionArea(tableDetector.FoundTableLocation);
                        // Export the table to CSV file
                        csvExtractor.SavePageCSVToFile(i, "page-" + i + "-table-" + t + ".csv");
                        t++;
                    }while (tableDetector.FindNextTable()); // search next table
                }
            }

            // Cleanup
            csvExtractor.Dispose();
            tableDetector.Dispose();

            // Open first output file in default associated application (for demo purposes)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("page-0-table-1.csv");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

コード例 #3

0

ファイルを表示

ファイル: Program.cs プロジェクト: remlex/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            CSVExtractor extractor = new CSVExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            TableDetector tdetector = new TableDetector();

            tdetector.RegistrationKey  = "demo";
            tdetector.RegistrationName = "demo";

            // we should define what kind of tables we should detect
            // so we set min required number of columns to 3
            tdetector.DetectionMinNumberOfColumns = 3;

            // and we set min required number of columns to 3
            tdetector.DetectionMinNumberOfRows = 3;

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");
            tdetector.LoadDocumentFromFile("sample3.pdf");

            // Get page count
            int pageCount = tdetector.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                int j = 1;
                // find first table and continue if found
                if (tdetector.FindTable(i))
                {
                    do
                    {
                        // set extraction area for CSV extractor to rectangle given by table detector
                        extractor.SetExtractionArea(tdetector.GetFoundTableRectangle_Left(),
                                                    tdetector.GetFoundTableRectangle_Top(),
                                                    tdetector.GetFoundTableRectangle_Width(),
                                                    tdetector.GetFoundTableRectangle_Height()
                                                    );

                        // and finally save the table into CSV file
                        extractor.SavePageCSVToFile(i, "page-" + i + "-table-" + j + ".csv");
                        j++;
                    } while (tdetector.FindNextTable()); // search next table
                }
            }

            // Open first output file in default associated application
            System.Diagnostics.Process.Start("page-0-table-1.csv");
        }

コード例 #4

0

ファイルを表示

        //------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        internal static List <Dictionary <string, string> > ExtractInvoiceLineFields(string pdfPath)
        {
            List <Dictionary <string, string> > invoiceLineFields = null;

            // Initialise table detector
            using (TableDetector tableDetector = new TableDetector("demo", "demo"))
            {
                using (CSVExtractor CSVExtractor = new CSVExtractor("demo", "demo"))
                {
                    // Set table detection mode to "bordered tables" - best for tables with closed solid borders.
                    tableDetector.ColumnDetectionMode = ColumnDetectionMode.BorderedTables;

                    // We should define what kind of tables we should detect.
                    // So we set min required number of columns to 2 ...
                    tableDetector.DetectionMinNumberOfColumns = 2;
                    // ... and we set min required number of rows to 2
                    tableDetector.DetectionMinNumberOfRows = 1;

                    // Load PDF document
                    tableDetector.LoadDocumentFromFile(pdfPath);
                    CSVExtractor.LoadDocumentFromFile(pdfPath);

                    // Get page count
                    int pageCount = tableDetector.GetPageCount();

                    if (tableDetector.FindTable(pageCount - 1))
                    {
                        // Set extraction area for CSV extractor to rectangle received from the table detector
                        CSVExtractor.SetExtractionArea(tableDetector.FoundTableLocation);

                        // Generate CSV data
                        var allCsvData = CSVExtractor.GetCSV();

                        // Generate Datatable
                        invoiceLineFields = GetFieldsFromCSV(allCsvData);
                    }
                }
            }

            return(invoiceLineFields);
        }

コード例 #5

0

ファイルを表示

ファイル: Program.cs プロジェクト: repohoarder/ByteScout-SDK-SourceCode

        /// <summary>
        /// Get DataTable from Document
        /// </summary>
        private static DataTable GetDataTableFromDocument(string fileName)
        {
            DataTable oDataTable = null;

            // Initialise table detector
            using (TableDetector tableDetector = new TableDetector("demo", "demo"))
            {
                using (CSVExtractor CSVExtractor = new CSVExtractor("demo", "demo"))
                {
                    // Set table detection mode to "bordered tables" - best for tables with closed solid borders.
                    tableDetector.ColumnDetectionMode = ColumnDetectionMode.BorderedTables;

                    // We should define what kind of tables we should detect.
                    // So we set min required number of columns to 2 ...
                    tableDetector.DetectionMinNumberOfColumns = 2;
                    // ... and we set min required number of rows to 2
                    tableDetector.DetectionMinNumberOfRows = 2;

                    // Load PDF document
                    tableDetector.LoadDocumentFromFile(fileName);
                    CSVExtractor.LoadDocumentFromFile(fileName);

                    // Get page count
                    int pageCount = tableDetector.GetPageCount();

                    if (tableDetector.FindTable(0))
                    {
                        // Set extraction area for CSV extractor to rectangle received from the table detector
                        CSVExtractor.SetExtractionArea(tableDetector.FoundTableLocation);

                        // Generate CSV data
                        var allCsvData = CSVExtractor.GetCSV();

                        // Generate Datatable
                        oDataTable = GetDataTableFromCSV(allCsvData);
                    }
                }
            }

            return(oDataTable);
        }

コード例 #6

0

ファイルを表示

ファイル: Form1.cs プロジェクト: bytescout/bytescout-showcases

        private void tsbExportToCSV_Click(object sender, EventArgs e)
        {
            // Get selections from viewer
            RectangleF[] selections = pdfViewerControl1.SelectionInPoints;

            string outputFile = @".\result.csv";

            using (CSVExtractor csvExtractor = new CSVExtractor("demo", "demo"))
            {
                // Load document into extractor
                csvExtractor.LoadDocumentFromFile(pdfViewerControl1.InputFile);

                // Enable OCR to recongize text from images
                csvExtractor.OCRMode               = OCRMode.Auto;
                csvExtractor.OCRResolution         = 300;
                csvExtractor.OCRLanguage           = "eng";
                csvExtractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                // There are double spaces between some words in your document.
                // To avoid such words break column structure increase the space ratio to 2.
                csvExtractor.DetectNewColumnBySpacesRatio = 2;

                // FYI, removing horizontal lines may increase the text recognition quality in some cases
                csvExtractor.OCRImagePreprocessingFilters.AddHorizontalLinesRemover();
                // Another filter able to improve the recognition
                //csvExtractor.OCRImagePreprocessingFilters.AddGammaCorrection();

                // If selection exists set the extraction area.
                // Overwise it will extract the whole page.
                if (selections.Length > 0)
                {
                    csvExtractor.SetExtractionArea(selections[0]);
                }

                // Save extraction results to CSV files
                csvExtractor.SavePageCSVToFile(pdfViewerControl1.CurrentPageIndex, outputFile);
            }

            Process.Start(outputFile);
        }

コード例 #7

0

ファイルを表示

ファイル: Program.cs プロジェクト: bytescout/pdf-extractor-sdk-samples-c-sharp

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.CSVExtractor instance
            CSVExtractor csvExtractor = new CSVExtractor();

            csvExtractor.RegistrationName = "demo";
            csvExtractor.RegistrationKey  = "demo";

            // Create Bytescout.PDFExtractor.TableDetector2 instance
            TableDetector2 tableDetector = new TableDetector2();

            tableDetector.RegistrationKey  = "demo";
            tableDetector.RegistrationName = "demo";

            // Load sample PDF document
            csvExtractor.LoadDocumentFromFile(@".\sample_borderless.pdf");
            tableDetector.LoadDocumentFromFile(@".\sample_borderless.pdf");

            // Get page count
            int pageCount = tableDetector.GetPageCount();

            var extractedCsvFiles = new List <string>();

            for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
            {
                var foundTables = tableDetector.FindTables(pageIndex).ToArray();

                // Find first table and continue if found
                if (foundTables.Length > 0)
                {
                    for (int indexTable = 0; indexTable < foundTables.Length; indexTable++)
                    {
                        // Set extraction area for CSV extractor to rectangle received from the table detector
                        csvExtractor.SetExtractionArea(foundTables[indexTable].Bounds);

                        // Result CSV file name
                        var outputCsvName = $"page-{pageIndex + 1}-table-{indexTable + 1}.csv";

                        // Export the table to CSV file
                        csvExtractor.SavePageCSVToFile(pageIndex, outputCsvName);
                        extractedCsvFiles.Add(outputCsvName);
                    }
                }
            }

            // Cleanup
            csvExtractor.Dispose();
            tableDetector.Dispose();

            // Show Summary
            Console.Clear();
            if (extractedCsvFiles.Count > 0)
            {
                Console.WriteLine($"Total {extractedCsvFiles.Count} tables found!");
                Console.WriteLine("--------------------------");
                Console.WriteLine(string.Join("\n", extractedCsvFiles));
            }
            else
            {
                Console.WriteLine("No Table Found!");
            }

            Console.ReadLine();
        }

C# (CSharp) CSVExtractor.SetExtractionAreaの例