XLSExtractor C# (CSharp)代码示例

示例#1

0

显示文件

文件： Program.cs 项目： bytescout/pdf-extractor-sdk-samples-c-sharp

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("ProductsMonthWise.pdf");

            // Uncomment this line if you need all pages converted into a single worksheet:
            //extractor.PageToWorksheet = false;

            // Set the output format to XLSX
            extractor.OutputFormat = SpreadseetOutputFormat.XLSX;

            // No of Pages
            var pageCount = extractor.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                string outputName = $"page_{i + 1}.xlsx";

                // Save page to spreadsheet file
                extractor.SavePageToXLSFile(i, outputName);

                Console.WriteLine("'{0}' Created", outputName);
            }

            // Cleanup
            extractor.Dispose();

            Console.ReadLine();
        }

示例#2

0

显示文件

文件： Program.cs 项目： bytescout/data-extraction-suite-samples-c-sharp

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            File.Delete("output.xls");

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");

            // Uncomment this line if you need all pages converted into a single worksheet:
            //extractor.PageToWorksheet = false;

            // Set the output format to XLS
            extractor.OutputFormat = SpreadseetOutputFormat.XLS;

            // Save the spreadsheet to file
            extractor.SaveToXLSFile("output.xls");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.xls");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

示例#3

0

显示文件

文件： Program.cs 项目： bytescout/pdf-extractor-sdk-samples-c-sharp

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            File.Delete("output.xls");

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");

            // Uncomment this line if you need all pages converted into a single worksheet:
            //extractor.PageToWorksheet = false;

            // Splits all text into words
            extractor.DetectNewColumnBySpacesRatio = 0.1f;

            // Add the following params to get clean data with word nodes only:
            extractor.PreserveFormattingOnTextExtraction = false; // Get rid of empty nodes
            extractor.OutputFormat = SpreadseetOutputFormat.XLS;  // Set the output format to XLS

            // Save the spreadsheet to file
            extractor.SaveToXLSFile("output.xls");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.xls");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

示例#4

0

显示文件

文件： Program.cs 项目： aharon13/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            File.Delete("output.xls");

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");

            // Uncomment this line if you need all pages converted into a single worksheet:
            //extractor.PageToWorksheet = false;

            // Save the spreadsheet to file
            extractor.SaveToXLSFile("output.xls");

            // Cleanup
            extractor.Dispose();

            // Open the spreadsheet in default associated application
            Process.Start("output.xls");
        }

示例#5

0

显示文件

文件： Program.cs 项目： remlex/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            File.Delete("output.xls");

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");

            // Save the spreadsheet to file
            extractor.SaveToXLSFile("output.xls");

            // Open the spreadsheet in default associated application
            Process.Start("output.xls");
        }

示例#6

0

显示文件

文件： Form1.cs 项目： bytescout/bytescout-showcases

        private void tsbExportToXLSX_Click(object sender, EventArgs e)
        {
            // Get selections from viewer
            RectangleF[] selections = pdfViewerControl1.SelectionInPoints;

            string outputFile = @".\result.xlsx";

            using (XLSExtractor xlsExtractor = new XLSExtractor("demo", "demo"))
            {
                // Load document into extractor
                xlsExtractor.LoadDocumentFromFile(pdfViewerControl1.InputFile);

                xlsExtractor.OCRMode               = OCRMode.Auto;
                xlsExtractor.OCRResolution         = 300;
                xlsExtractor.OCRLanguage           = "eng";
                xlsExtractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                xlsExtractor.OutputFormat       = SpreadseetOutputFormat.XLSX;
                xlsExtractor.RichTextFormatting = false;

                // There are double spaces between some words in your document.
                // To avoid such words break column structure increase the space ratio to 2.
                xlsExtractor.DetectNewColumnBySpacesRatio = 2;

                // FYI, removing horizontal lines may increase the text recognition quality in some cases
                //xlsExtractor.OCRImagePreprocessingFilters.AddHorizontalLinesRemover();
                // Another filter able to improve the recognition
                //xlsExtractor.OCRImagePreprocessingFilters.AddGammaCorrection();

                // If selection exists set the extraction area.
                // Overwise it will extract the whole page.
                if (selections.Length > 0)
                {
                    xlsExtractor.SetExtractionArea(selections[0]);
                }

                // Save extraction results to XLSX files
                xlsExtractor.SavePageToXLSFile(pdfViewerControl1.CurrentPageIndex, outputFile);
            }

            Process.Start(outputFile);
        }

示例#7

0

显示文件

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            File.Delete("output.xls");

            // Document Password Can be set in two ways
            // 1. Using Property
            // 2. Using Events

            // 1. Handle document password using Property
            // extractor.Password = "******"

            // 2. Handle document password using Event
            extractor.PasswordRequired += new System.EventHandler(extractor_PasswordRequired);

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\encrypted (password is 'password').pdf");

            // Set the output format to XLS
            extractor.OutputFormat = SpreadseetOutputFormat.XLS;

            // Save the spreadsheet to file
            extractor.SaveToXLSFile("output.xls");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.xls");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

示例#8

0

显示文件

文件： AutoTrackPdf.cs 项目： jayakumargithub/Auto_track_pdf_extractor

        public void Extract()
        {
            string xlsFile  = string.Empty;
            string _pdfFile = string.Empty;

            var rootFolder = FileLocation.CreateBaseFolderIsfNotExists();

            var files = Directory.GetFiles(rootFolder + ConfigurationManager.AppSettings["PdfFileLocation"]).Where(x => x.EndsWith(".pdf")).ToArray();

            if (files.Length == 0)
            {
                Logger.Info("No Pdf File Found");
                return;
            }


            var      extractor = new XLSExtractor();
            FileInfo info      = null;

            extractor.RegistrationName = "NOT - FOR - RESALE - SINGLE - ENDUSER - ONLY - NO - PRIVATE - SUPPORT - [email protected]";
            extractor.RegistrationKey  = "10C9-AC43-B36E-997C-CCCF-BDEB-C9D";

            _pdfFile = files[0];

            Logger.Info("Total file Loaded: " + files.Length);
            Logger.Info("Pdf file loaded: " + _pdfFile);
            extractor.PageDataCaching = PageDataCaching.None;

            Logger.Info("Xsl file loaded: " + xlsFile);
            extractor.AutoAlignColumnsToHeader = true;
            Logger.Info("Pdf file extraction started");
            Logger.Info("Loaded file:" + files[0]);
            info     = new FileInfo(_pdfFile);
            xlsFile  = FileLocation.GetFilePath(FileLocationEnum.Xls);
            xlsFile += info.Name.Split('.')[0] + ".xls";

            try
            {
                extractor.LoadDocumentFromFile(files[0]);
                extractor.SaveToXLSFile(xlsFile);
                extractor.Reset();
            }
            catch (PDFExtractorException ex)
            {
                Logger.Info("PDFExtractorException: " + ex.Message);
            }
            if (extractor.IsDocumentLoaded)
            {
                extractor.Dispose();
            }



            XlsReaderService.ReadXls(xlsFile);
            try
            {
                var destination = FileLocation.GetFilePath(FileLocationEnum.ProcessedPdf);
                var fileName    = info.Name.Split('.')[0];
                info.MoveTo(destination + fileName + ".pdf");
            }
            catch (IOException ex)
            {
                Logger.Info("Delete Error: " + ex.Message);
            }
        }

C# (CSharp) XLSExtractor示例