static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("ProductsMonthWise.pdf");

            // Uncomment this line if you need all pages converted into a single worksheet:
            //extractor.PageToWorksheet = false;

            // Set the output format to XLSX
            extractor.OutputFormat = SpreadseetOutputFormat.XLSX;

            // No of Pages
            var pageCount = extractor.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                string outputName = $"page_{i + 1}.xlsx";

                // Save page to spreadsheet file
                extractor.SavePageToXLSFile(i, outputName);

                Console.WriteLine("'{0}' Created", outputName);
            }

            // Cleanup
            extractor.Dispose();

            Console.ReadLine();
        }
Exemplo n.º 2
0
        private void tsbExportToXLSX_Click(object sender, EventArgs e)
        {
            // Get selections from viewer
            RectangleF[] selections = pdfViewerControl1.SelectionInPoints;

            string outputFile = @".\result.xlsx";

            using (XLSExtractor xlsExtractor = new XLSExtractor("demo", "demo"))
            {
                // Load document into extractor
                xlsExtractor.LoadDocumentFromFile(pdfViewerControl1.InputFile);

                xlsExtractor.OCRMode               = OCRMode.Auto;
                xlsExtractor.OCRResolution         = 300;
                xlsExtractor.OCRLanguage           = "eng";
                xlsExtractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                xlsExtractor.OutputFormat       = SpreadseetOutputFormat.XLSX;
                xlsExtractor.RichTextFormatting = false;

                // There are double spaces between some words in your document.
                // To avoid such words break column structure increase the space ratio to 2.
                xlsExtractor.DetectNewColumnBySpacesRatio = 2;

                // FYI, removing horizontal lines may increase the text recognition quality in some cases
                //xlsExtractor.OCRImagePreprocessingFilters.AddHorizontalLinesRemover();
                // Another filter able to improve the recognition
                //xlsExtractor.OCRImagePreprocessingFilters.AddGammaCorrection();

                // If selection exists set the extraction area.
                // Overwise it will extract the whole page.
                if (selections.Length > 0)
                {
                    xlsExtractor.SetExtractionArea(selections[0]);
                }

                // Save extraction results to XLSX files
                xlsExtractor.SavePageToXLSFile(pdfViewerControl1.CurrentPageIndex, outputFile);
            }

            Process.Start(outputFile);
        }