static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\sample1.pdf"); // Get page count int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { // Create new stream. You can use MemoryStream or any other System.IO.Stream inheritor. FileStream stream = new FileStream(@".\page" + i + ".txt", FileMode.Create); // Save text from page to the file stream extractor.SavePageTextToStream(i, stream); // Close stream stream.Dispose(); } // Cleanup extractor.Dispose(); // Open first output file in default associated application ProcessStartInfo processStartInfo = new ProcessStartInfo(@".\page1.txt"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
protected void Page_Load(object sender, EventArgs e) { String inputFile = Server.MapPath(@".\bin\columns.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); // Get dimensions of the first document page RectangleF rectangle = extractor.GetPageRectangle(0); // Get text from the 1/3 of the page rectangle.Width = rectangle.Width / 3f; Response.Clear(); Response.ContentType = "text/html"; extractor.SetExtractionArea(rectangle); Response.Write("<pre>"); // Save extracted text to output stream extractor.SavePageTextToStream(0, Response.OutputStream); Response.Write("</pre>"); Response.End(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample1.pdf"); // Get page count int pageCount = extractor.GetPageCount(); string outputText = ""; for (int i = 0; i < pageCount; i++) { // create new file stream FileStream fStream = new FileStream("page" + i.ToString() + ".txt", FileMode.Create); // save text from page #i to the file stream extractor.SavePageTextToStream(i, fStream); // close stream fStream.Close(); } // Open first output file in default associated application System.Diagnostics.Process.Start("page1.txt"); }
protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath("columns.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); // read width of the very first page (zero index) float pageWidth = extractor.GetPageRect_Width(0); float pageHeight = extractor.GetPageRect_Height(0); // now we are extracting content assuming we have 3 columns // equally distributed on pages // first calculate the width of the one column by dividing page width by number of columns (3) float columnWidth = pageWidth / 3f; Response.Clear(); Response.ContentType = "text/html"; // iterate through 3 columns for (int i = 0; i < 3; i++) { // set the extraction area to the #i column extractor.SetExtractionArea(i * columnWidth, 0, columnWidth, pageHeight); // Save extracted text to output stream extractor.SavePageTextToStream(0, Response.OutputStream); } Response.End(); }