Esempio n. 1
0
        public static void Run()
        {
            // ExStart:ExtractTextPage
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text();
            // Open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "ExtractText-Page.pdf");

            // Use parameterless ExtractText method
            pdfExtractor.ExtractText();

            int pageNumber = 1;

            while (pdfExtractor.HasNextPageText())
            {
                MemoryStream tempMemoryStream = new MemoryStream();
                pdfExtractor.GetNextPageText(tempMemoryStream);
                string text = "";
                // Specify Unicode encoding type in StreamReader constructor
                using (StreamReader streamReader = new
                                                   StreamReader(tempMemoryStream, Encoding.Unicode))
                {
                    streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                    text = streamReader.ReadToEnd();
                }
                File.WriteAllText(dataDir + "output" + pageNumber + "_out_.txt", text);
                pageNumber++;
            }
            // ExEnd:ExtractTextPage
        }
Esempio n. 2
0
        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "input.pdf");

            //use parameterless ExtractText method
            pdfExtractor.ExtractText();

            int pageNumber = 1;

            while (pdfExtractor.HasNextPageText())
            {
                MemoryStream tempMemoryStream = new MemoryStream();
                pdfExtractor.GetNextPageText(tempMemoryStream);
                string text = "";
                //specify Unicode encoding type in StreamReader constructor
                using (StreamReader streamReader = new
                StreamReader(tempMemoryStream, Encoding.Unicode))
                {
                    streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                    text = streamReader.ReadToEnd();
                }
                File.WriteAllText(dataDir+ "output" + pageNumber + ".txt", text);
                pageNumber++;
            }
        }
Esempio n. 3
0
        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "input.pdf");

            //use parameterless ExtractText method
            pdfExtractor.ExtractText();

            int pageNumber = 1;

            while (pdfExtractor.HasNextPageText())
            {
                MemoryStream tempMemoryStream = new MemoryStream();
                pdfExtractor.GetNextPageText(tempMemoryStream);
                string text = "";
                //specify Unicode encoding type in StreamReader constructor
                using (StreamReader streamReader = new
                                                   StreamReader(tempMemoryStream, Encoding.Unicode))
                {
                    streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                    text = streamReader.ReadToEnd();
                }
                File.WriteAllText(dataDir + "output" + pageNumber + ".txt", text);
                pageNumber++;
            }
        }
        public static void Run()
        {
            // ExStart:ExtractTextPage
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text();
            // Open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "ExtractText-Page.pdf");

            // Use parameterless ExtractText method
            pdfExtractor.ExtractText();

            int pageNumber = 1;

            while (pdfExtractor.HasNextPageText())
            {
                MemoryStream tempMemoryStream = new MemoryStream();
                pdfExtractor.GetNextPageText(tempMemoryStream);
                string text = "";
                // Specify Unicode encoding type in StreamReader constructor
                using (StreamReader streamReader = new
                StreamReader(tempMemoryStream, Encoding.Unicode))
                {
                    streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                    text = streamReader.ReadToEnd();
                }
                File.WriteAllText(dataDir+ "output" + pageNumber + "_out.txt", text);
                pageNumber++;
            }
            // ExEnd:ExtractTextPage
        }
        public static void Run()
        {
            // ExStart:PdfExtractorFeatures
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles();

            // Create an instance of PdfExtractor class
            PdfExtractor extractor = new PdfExtractor();

            // Set PDF file password
            extractor.Password = "";
            // Specify start and end pages of the PDF
            extractor.StartPage = 1;
            extractor.EndPage   = 10;

            // Bind PDF file with the extractor object
            extractor.BindPdf(dataDir + "inFile.pdf");
            // Extract all text from the PDF
            extractor.ExtractText();
            // Save extracted text in a text file
            extractor.GetText(dataDir + "PdfExtractorFeatures_text_out_.txt");

            // Text of individual pages can also be saved individually in single text files
            if (extractor.HasNextPageText())
            {
                extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out_.txt");
            }

            // Extract images from PDF file
            extractor.ExtractImage();
            // Save each individual image in an image file
            if (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out_.jpg", System.Drawing.Imaging.ImageFormat.Jpeg);
            }

            // Extract attachments
            extractor.ExtractAttachment();
            extractor.GetAttachment(dataDir);
            // ExEnd:PdfExtractorFeatures
        }
        public static void Run()
        {
            // ExStart:PdfExtractorFeatures
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles();

            // Create an instance of PdfExtractor class
            PdfExtractor extractor = new PdfExtractor();

            // Set PDF file password
            extractor.Password = "";
            // Specify start and end pages of the PDF
            extractor.StartPage = 1;
            extractor.EndPage = 10;

            // Bind PDF file with the extractor object
            extractor.BindPdf( dataDir +  "inFile.pdf");
            // Extract all text from the PDF
            extractor.ExtractText();
            // Save extracted text in a text file
            extractor.GetText(dataDir + "PdfExtractorFeatures_text_out.txt");

            // Text of individual pages can also be saved individually in single text files
            if (extractor.HasNextPageText())
            {
                extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out.txt");
            }

            // Extract images from PDF file
            extractor.ExtractImage();
            // Save each individual image in an image file
            if (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out.jpg", System.Drawing.Imaging.ImageFormat.Jpeg);
            }

            // Extract attachments
            extractor.ExtractAttachment();           
            extractor.GetAttachment(dataDir);
            // ExEnd:PdfExtractorFeatures                      
        }