static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("columns.pdf"); // read width of the very first page (zero index) float pageWidth = extractor.GetPageRect_Width(0); float pageHeight = extractor.GetPageRect_Height(0); // now we are extracting content assuming we have 3 columns // equally distributed on pages // first calculate the width of the one column by dividing page width by number of columns (3) float columnWidth = pageWidth / 3f; // iterate through 3 columns for (int i = 0; i < 3; i++) { // set the extraction area to the #i column extractor.SetExtractionArea(i * columnWidth, 0, columnWidth, pageHeight); string outFileName = "columns-column" + i + ".txt"; extractor.SavePageTextToFile(0, outFileName); // Open output file in default associated application System.Diagnostics.Process.Start(outFileName); } }
protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath("columns.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); // read width of the very first page (zero index) float pageWidth = extractor.GetPageRect_Width(0); float pageHeight = extractor.GetPageRect_Height(0); // now we are extracting content assuming we have 3 columns // equally distributed on pages // first calculate the width of the one column by dividing page width by number of columns (3) float columnWidth = pageWidth / 3f; Response.Clear(); Response.ContentType = "text/html"; // iterate through 3 columns for (int i = 0; i < 3; i++) { // set the extraction area to the #i column extractor.SetExtractionArea(i * columnWidth, 0, columnWidth, pageHeight); // Save extracted text to output stream extractor.SavePageTextToStream(0, Response.OutputStream); } Response.End(); }