static void Main(string[] args) { // Create Bytescout.PDFExtractor.XLSExtractor instance XLSExtractor extractor = new XLSExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("ProductsMonthWise.pdf"); // Uncomment this line if you need all pages converted into a single worksheet: //extractor.PageToWorksheet = false; // Set the output format to XLSX extractor.OutputFormat = SpreadseetOutputFormat.XLSX; // No of Pages var pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { string outputName = $"page_{i + 1}.xlsx"; // Save page to spreadsheet file extractor.SavePageToXLSFile(i, outputName); Console.WriteLine("'{0}' Created", outputName); } // Cleanup extractor.Dispose(); Console.ReadLine(); }
private void tsbExportToXLSX_Click(object sender, EventArgs e) { // Get selections from viewer RectangleF[] selections = pdfViewerControl1.SelectionInPoints; string outputFile = @".\result.xlsx"; using (XLSExtractor xlsExtractor = new XLSExtractor("demo", "demo")) { // Load document into extractor xlsExtractor.LoadDocumentFromFile(pdfViewerControl1.InputFile); xlsExtractor.OCRMode = OCRMode.Auto; xlsExtractor.OCRResolution = 300; xlsExtractor.OCRLanguage = "eng"; xlsExtractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"; xlsExtractor.OutputFormat = SpreadseetOutputFormat.XLSX; xlsExtractor.RichTextFormatting = false; // There are double spaces between some words in your document. // To avoid such words break column structure increase the space ratio to 2. xlsExtractor.DetectNewColumnBySpacesRatio = 2; // FYI, removing horizontal lines may increase the text recognition quality in some cases //xlsExtractor.OCRImagePreprocessingFilters.AddHorizontalLinesRemover(); // Another filter able to improve the recognition //xlsExtractor.OCRImagePreprocessingFilters.AddGammaCorrection(); // If selection exists set the extraction area. // Overwise it will extract the whole page. if (selections.Length > 0) { xlsExtractor.SetExtractionArea(selections[0]); } // Save extraction results to XLSX files xlsExtractor.SavePageToXLSFile(pdfViewerControl1.CurrentPageIndex, outputFile); } Process.Start(outputFile); }