public static void Run() { // ExStart:ExtractTextPage // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text(); // Open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "ExtractText-Page.pdf"); // Use parameterless ExtractText method pdfExtractor.ExtractText(); int pageNumber = 1; while (pdfExtractor.HasNextPageText()) { MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetNextPageText(tempMemoryStream); string text = ""; // Specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir + "output" + pageNumber + "_out_.txt", text); pageNumber++; } // ExEnd:ExtractTextPage }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "input.pdf"); //use parameterless ExtractText method pdfExtractor.ExtractText(); int pageNumber = 1; while (pdfExtractor.HasNextPageText()) { MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetNextPageText(tempMemoryStream); string text = ""; //specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir+ "output" + pageNumber + ".txt", text); pageNumber++; } }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "input.pdf"); //use parameterless ExtractText method pdfExtractor.ExtractText(); int pageNumber = 1; while (pdfExtractor.HasNextPageText()) { MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetNextPageText(tempMemoryStream); string text = ""; //specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir + "output" + pageNumber + ".txt", text); pageNumber++; } }
public static void Run() { // ExStart:ExtractTextPage // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text(); // Open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "ExtractText-Page.pdf"); // Use parameterless ExtractText method pdfExtractor.ExtractText(); int pageNumber = 1; while (pdfExtractor.HasNextPageText()) { MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetNextPageText(tempMemoryStream); string text = ""; // Specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir+ "output" + pageNumber + "_out.txt", text); pageNumber++; } // ExEnd:ExtractTextPage }
public static void Run() { // ExStart:PdfExtractorFeatures // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles(); // Create an instance of PdfExtractor class PdfExtractor extractor = new PdfExtractor(); // Set PDF file password extractor.Password = ""; // Specify start and end pages of the PDF extractor.StartPage = 1; extractor.EndPage = 10; // Bind PDF file with the extractor object extractor.BindPdf(dataDir + "inFile.pdf"); // Extract all text from the PDF extractor.ExtractText(); // Save extracted text in a text file extractor.GetText(dataDir + "PdfExtractorFeatures_text_out_.txt"); // Text of individual pages can also be saved individually in single text files if (extractor.HasNextPageText()) { extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out_.txt"); } // Extract images from PDF file extractor.ExtractImage(); // Save each individual image in an image file if (extractor.HasNextImage()) { extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out_.jpg", System.Drawing.Imaging.ImageFormat.Jpeg); } // Extract attachments extractor.ExtractAttachment(); extractor.GetAttachment(dataDir); // ExEnd:PdfExtractorFeatures }
public static void Run() { // ExStart:PdfExtractorFeatures // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles(); // Create an instance of PdfExtractor class PdfExtractor extractor = new PdfExtractor(); // Set PDF file password extractor.Password = ""; // Specify start and end pages of the PDF extractor.StartPage = 1; extractor.EndPage = 10; // Bind PDF file with the extractor object extractor.BindPdf( dataDir + "inFile.pdf"); // Extract all text from the PDF extractor.ExtractText(); // Save extracted text in a text file extractor.GetText(dataDir + "PdfExtractorFeatures_text_out.txt"); // Text of individual pages can also be saved individually in single text files if (extractor.HasNextPageText()) { extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out.txt"); } // Extract images from PDF file extractor.ExtractImage(); // Save each individual image in an image file if (extractor.HasNextImage()) { extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out.jpg", System.Drawing.Imaging.ImageFormat.Jpeg); } // Extract attachments extractor.ExtractAttachment(); extractor.GetAttachment(dataDir); // ExEnd:PdfExtractorFeatures }