public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "input.pdf"); //specify start and end pages pdfExtractor.StartPage = 1; pdfExtractor.EndPage = 1; //use parameterless ExtractText method pdfExtractor.ExtractText(); MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetText(tempMemoryStream); string text = ""; //specify Unicode encoding type in StreamReader constructor using (StreamReader sr = new StreamReader(tempMemoryStream, Encoding.Unicode)) { sr.BaseStream.Seek(0, SeekOrigin.Begin); text = sr.ReadToEnd(); } File.WriteAllText(dataDir + "output.txt", text); }
public static void Run() { // ExStart:ExtractImagesStream // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); // Open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "ExtractImages-Stream.pdf"); // Extract images pdfExtractor.ExtractImage(); // Get all the extracted images while (pdfExtractor.HasNextImage()) { // Read image into memory stream MemoryStream memoryStream = new MemoryStream(); pdfExtractor.GetNextImage(memoryStream); // Write to disk, if you like, or use it otherwise. FileStream fileStream = new FileStream(dataDir+ DateTime.Now.Ticks.ToString() + "_out.jpg", FileMode.Create); memoryStream.WriteTo(fileStream); fileStream.Close(); } // ExEnd:ExtractImagesStream }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text(); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "ExtractText-Page.pdf"); //use parameterless ExtractText method pdfExtractor.ExtractText(); int pageNumber = 1; while (pdfExtractor.HasNextPageText()) { MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetNextPageText(tempMemoryStream); string text = ""; //specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir + "output" + pageNumber + ".txt", text); pageNumber++; } }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text(); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "ExtractText-PageRange.pdf"); //specify start and end pages pdfExtractor.StartPage = 1; pdfExtractor.EndPage = 1; //use parameterless ExtractText method pdfExtractor.ExtractText(); MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetText(tempMemoryStream); string text = ""; //specify Unicode encoding type in StreamReader constructor using (StreamReader sr = new StreamReader(tempMemoryStream,Encoding.Unicode)) { sr.BaseStream.Seek(0, SeekOrigin.Begin); text = sr.ReadToEnd(); } File.WriteAllText(dataDir+ "output.txt", text); }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "Input_new.pdf"); //set StartPage and EndPage properties to specify range pdfExtractor.StartPage = 10; pdfExtractor.EndPage = 20; //extract images pdfExtractor.ExtractImage(); //get all the extracted images while (pdfExtractor.HasNextImage()) { //read image into memory stream MemoryStream memoryStream = new MemoryStream(); pdfExtractor.GetNextImage(memoryStream); //write to disk, if you like, or use it otherwise. FileStream fileStream = new FileStream(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create); memoryStream.WriteTo(fileStream); fileStream.Close(); } }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "input.pdf"); //extract images pdfExtractor.ExtractImage(); //get all the extracted images while (pdfExtractor.HasNextImage()) { //read image into memory stream MemoryStream memoryStream = new MemoryStream(); pdfExtractor.GetNextImage(memoryStream); //write to disk, if you like, or use it otherwise. FileStream fileStream = new FileStream(dataDir + DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create); memoryStream.WriteTo(fileStream); fileStream.Close(); } }
public static void Run() { // ExStart:ExtractText // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text(); // Open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "ExtractText.pdf"); // Use parameterless ExtractText method pdfExtractor.ExtractText(); MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetText(tempMemoryStream); string text = ""; // Specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir + "output_out.txt", text); // ExEnd:ExtractText }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "ExtractImages-Page.pdf"); //set StartPage and EndPage properties to the page number to //you want to extract images from pdfExtractor.StartPage = 2; pdfExtractor.EndPage = 2; //extract images pdfExtractor.ExtractImage(); //get extracted images while (pdfExtractor.HasNextImage()) { //read image into memory stream MemoryStream memoryStream = new MemoryStream(); pdfExtractor.GetNextImage(memoryStream); //write to disk, if you like, or use it otherwise. FileStream fileStream = new FileStream(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create); memoryStream.WriteTo(fileStream); fileStream.Close(); } }
public static void Run() { // ExStart:ExtractTextPage // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text(); // Open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "ExtractText-Page.pdf"); // Use parameterless ExtractText method pdfExtractor.ExtractText(); int pageNumber = 1; while (pdfExtractor.HasNextPageText()) { MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetNextPageText(tempMemoryStream); string text = ""; // Specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir+ "output" + pageNumber + "_out.txt", text); pageNumber++; } // ExEnd:ExtractTextPage }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "ExtractImages-Page.pdf"); //set StartPage and EndPage properties to the page number to //you want to extract images from pdfExtractor.StartPage = 2; pdfExtractor.EndPage = 2; //extract images pdfExtractor.ExtractImage(); //get extracted images while (pdfExtractor.HasNextImage()) { //read image into memory stream MemoryStream memoryStream = new MemoryStream(); pdfExtractor.GetNextImage(memoryStream); //write to disk, if you like, or use it otherwise. FileStream fileStream = new FileStream(dataDir + DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create); memoryStream.WriteTo(fileStream); fileStream.Close(); } }
public static void Run() { // ExStart:ExtractImagesStream // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); // Open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "ExtractImages-Stream.pdf"); // Extract images pdfExtractor.ExtractImage(); // Get all the extracted images while (pdfExtractor.HasNextImage()) { // Read image into memory stream MemoryStream memoryStream = new MemoryStream(); pdfExtractor.GetNextImage(memoryStream); // Write to disk, if you like, or use it otherwise. FileStream fileStream = new FileStream(dataDir + DateTime.Now.Ticks.ToString() + "_out.jpg", FileMode.Create); memoryStream.WriteTo(fileStream); fileStream.Close(); } // ExEnd:ExtractImagesStream }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "input.pdf"); //use parameterless ExtractText method pdfExtractor.ExtractText(); int pageNumber = 1; while (pdfExtractor.HasNextPageText()) { MemoryStream tempMemoryStream = new MemoryStream(); pdfExtractor.GetNextPageText(tempMemoryStream); string text = ""; //specify Unicode encoding type in StreamReader constructor using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode)) { streamReader.BaseStream.Seek(0, SeekOrigin.Begin); text = streamReader.ReadToEnd(); } File.WriteAllText(dataDir+ "output" + pageNumber + ".txt", text); pageNumber++; } }
public static void Run() { // ExStart:PdfContainsTextOrImages // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles(); // Instantiate a memoryStream object to hold the extracted text from Document MemoryStream ms = new MemoryStream(); // Instantiate PdfExtractor object PdfExtractor extractor = new PdfExtractor(); // Bind the input PDF document to extractor extractor.BindPdf(dataDir + "FilledForm.pdf"); // Extract text from the input PDF document extractor.ExtractText(); bool containsText = false; bool containsImage = false; // Save the extracted text to a text file extractor.GetText(ms); // Check if the MemoryStream length is greater than or equal to 1 if (ms.Length >= 1) { containsText = true; } // Extract images from the input PDF document extractor.ExtractImage(); // Calling HasNextImage method in while loop. When images will finish, loop will exit if (extractor.HasNextImage()) { containsImage = true; } // Now find out whether this PDF is text only or image only if (containsText == true && containsImage == false) { Console.WriteLine("PDF contains text only"); } else if (containsText == false && containsImage == true) { Console.WriteLine("PDF contains image only"); } else if (containsText == true && containsImage == true) { Console.WriteLine("PDF contains both text and image"); } else if (containsText == false && containsImage == false) { Console.WriteLine("PDF contains neither text or nor image"); } // ExEnd:PdfContainsTextOrImages }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "input.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get extracted attachments pdfExtractor.GetAttachment(dataDir+ ".\\output"); }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "input.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get extracted attachments pdfExtractor.GetAttachment(dataDir + ".\\output"); }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments(); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "ExtractAllAttachments.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get extracted attachments pdfExtractor.GetAttachment(dataDir + ".\\output"); }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments(); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "ExtractAllAttachments.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get extracted attachments pdfExtractor.GetAttachment(dataDir+ ".\\output"); }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "input.pdf"); //extract all the images pdfExtractor.ExtractImage(); //get all the extracted images while (pdfExtractor.HasNextImage()) pdfExtractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg"); }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "input.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get attachment names System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames(); foreach (string attachmentName in attachmentNames) Console.WriteLine("Name : {0}", attachmentName); }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments(); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "GetAttachmentNames.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get attachment names System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames(); foreach (string attachmentName in attachmentNames) Console.WriteLine("Name : {0}", attachmentName); }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "ExtractImages.pdf"); //extract all the images pdfExtractor.ExtractImage(); //get all the extracted images while (pdfExtractor.HasNextImage()) { pdfExtractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".jpg"); } }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "input.pdf"); //extract all the images pdfExtractor.ExtractImage(); //get all the extracted images while (pdfExtractor.HasNextImage()) { pdfExtractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".jpg"); } }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); //open input PDF PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir+ "ExtractImages.pdf"); //extract all the images pdfExtractor.ExtractImage(); //get all the extracted images while (pdfExtractor.HasNextImage()) pdfExtractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg"); }
public static void Run() { // ExStart:PdfContainsTextOrImages // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles(); // Instantiate a memoryStream object to hold the extracted text from Document MemoryStream ms = new MemoryStream(); // Instantiate PdfExtractor object PdfExtractor extractor = new PdfExtractor(); // Bind the input PDF document to extractor extractor.BindPdf(dataDir + "FilledForm.pdf"); // Extract text from the input PDF document extractor.ExtractText(); bool containsText = false; bool containsImage = false; // Save the extracted text to a text file extractor.GetText(ms); // Check if the MemoryStream length is greater than or equal to 1 if (ms.Length >= 1) containsText = true; // Extract images from the input PDF document extractor.ExtractImage(); // Calling HasNextImage method in while loop. When images will finish, loop will exit if (extractor.HasNextImage()) containsImage = true; // Now find out whether this PDF is text only or image only if (containsText == true && containsImage == false) Console.WriteLine("PDF contains text only"); else if (containsText == false && containsImage == true) Console.WriteLine("PDF contains image only"); else if (containsText == true && containsImage == true) Console.WriteLine("PDF contains both text and image"); else if (containsText == false && containsImage == false) Console.WriteLine("PDF contains neither text or nor image"); // ExEnd:PdfContainsTextOrImages }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "input.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get attachment names System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames(); foreach (string attachmentName in attachmentNames) { Console.WriteLine("Name : {0}", attachmentName); } }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments(); //open document PdfExtractor pdfExtractor = new PdfExtractor(); pdfExtractor.BindPdf(dataDir + "GetAttachmentNames.pdf"); //extract attachments pdfExtractor.ExtractAttachment(); //get attachment names System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames(); foreach (string attachmentName in attachmentNames) { Console.WriteLine("Name : {0}", attachmentName); } }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor extractor = new PdfExtractor(); extractor.BindPdf(dataDir+ "input.pdf"); //Specify Image Extraction Mode extractor.ExtractImageMode = ExtractImageMode.DefinedInResources; //Extract Images based on Image Extraction Mode extractor.ExtractImage(); //Get all the extracted images while (extractor.HasNextImage()) { extractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + ".png" , System.Drawing.Imaging.ImageFormat.Png); } }
public static void Run() { // ExStart:PdfExtractorFeatures // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles(); // Create an instance of PdfExtractor class PdfExtractor extractor = new PdfExtractor(); // Set PDF file password extractor.Password = ""; // Specify start and end pages of the PDF extractor.StartPage = 1; extractor.EndPage = 10; // Bind PDF file with the extractor object extractor.BindPdf(dataDir + "inFile.pdf"); // Extract all text from the PDF extractor.ExtractText(); // Save extracted text in a text file extractor.GetText(dataDir + "PdfExtractorFeatures_text_out_.txt"); // Text of individual pages can also be saved individually in single text files if (extractor.HasNextPageText()) { extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out_.txt"); } // Extract images from PDF file extractor.ExtractImage(); // Save each individual image in an image file if (extractor.HasNextImage()) { extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out_.jpg", System.Drawing.Imaging.ImageFormat.Jpeg); } // Extract attachments extractor.ExtractAttachment(); extractor.GetAttachment(dataDir); // ExEnd:PdfExtractorFeatures }
public static void Run() { // ExStart:PdfExtractorFeatures // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles(); // Create an instance of PdfExtractor class PdfExtractor extractor = new PdfExtractor(); // Set PDF file password extractor.Password = ""; // Specify start and end pages of the PDF extractor.StartPage = 1; extractor.EndPage = 10; // Bind PDF file with the extractor object extractor.BindPdf( dataDir + "inFile.pdf"); // Extract all text from the PDF extractor.ExtractText(); // Save extracted text in a text file extractor.GetText(dataDir + "PdfExtractorFeatures_text_out.txt"); // Text of individual pages can also be saved individually in single text files if (extractor.HasNextPageText()) { extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out.txt"); } // Extract images from PDF file extractor.ExtractImage(); // Save each individual image in an image file if (extractor.HasNextImage()) { extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out.jpg", System.Drawing.Imaging.ImageFormat.Jpeg); } // Extract attachments extractor.ExtractAttachment(); extractor.GetAttachment(dataDir); // ExEnd:PdfExtractorFeatures }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); //open input PDF PdfExtractor extractor = new PdfExtractor(); extractor.BindPdf(dataDir + "ExtractImageExtractionMode.pdf"); //Specify Image Extraction Mode extractor.ExtractImageMode = ExtractImageMode.DefinedInResources; //Extract Images based on Image Extraction Mode extractor.ExtractImage(); //Get all the extracted images while (extractor.HasNextImage()) { extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png); } }
public static void Main() { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open input PDF PdfExtractor extractor = new PdfExtractor(); extractor.BindPdf(dataDir + "input.pdf"); //Specify Image Extraction Mode extractor.ExtractImageMode = ExtractImageMode.DefinedInResources; //Extract Images based on Image Extraction Mode extractor.ExtractImage(); //Get all the extracted images while (extractor.HasNextImage()) { extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png); } }
public static void Run() { // ExStart:ExtractImageExtractionMode // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images(); // Open input PDF PdfExtractor extractor = new PdfExtractor(); extractor.BindPdf(dataDir+ "ExtractImageExtractionMode.pdf"); // Specify Image Extraction Mode extractor.ExtractImageMode = ExtractImageMode.DefinedInResources; // Extract Images based on Image Extraction Mode extractor.ExtractImage(); // Get all the extracted images while (extractor.HasNextImage()) { extractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + "_out.png" , System.Drawing.Imaging.ImageFormat.Png); } // ExEnd:ExtractImageExtractionMode }