Exemplo n.º 1
0
        public static Dictionary <int, string> Extract(string pdfFileName)
        {
            if (!File.Exists(pdfFileName))
            {
                throw new FileNotFoundException("pdfFileName");
            }

            var        result      = new Dictionary <int, string>();
            PDDocument pdfDocument = PDDocument.load(pdfFileName);

            var pdfStripper = new PDFTextStripper();

            pdfStripper.setPageSeparator(Environment.NewLine + Environment.NewLine);

            for (int i = 1; i <= pdfDocument.getNumberOfPages(); i++)
            {
                pdfStripper.setStartPage(i);
                pdfStripper.setEndPage(i);

                result.Add(i, GetText(pdfStripper, pdfDocument));
            }

            pdfDocument.close();
            return(result);
        }
Exemplo n.º 2
0
        public static Dictionary <int, string> Extract(string pdfFileName)
        {
            if (!File.Exists(pdfFileName))
            {
                throw new FileNotFoundException("pdfFileName");
            }

            var        result      = new Dictionary <int, string>();
            PDDocument pdfDocument = PDDocument.load(pdfFileName);

            var pdfStripper = new PDFTextStripper();

            pdfStripper.setPageSeparator(Environment.NewLine + Environment.NewLine);

            for (int i = 1; i <= pdfDocument.getNumberOfPages(); i++)
            {
                pdfStripper.setStartPage(i);
                pdfStripper.setEndPage(i);

                //ExtractText(pdfStripper, pdfDocument,
                //  string.Format(@"c:\Users\tri.hoang\Desktop\temp\epub-belastingblad\2014-08\pdf\page_{0}.txt", i.ToString().PadLeft(5, '0')));

                result.Add(i, GetText(pdfStripper, pdfDocument));
            }

            pdfDocument.close();
            return(result);
        }