protected void buttonExtractText_Click(object sender, EventArgs e) { // get the PDF file string pdfFile = Server.MapPath("~") + @"\DemoFiles\Pdf\InputPdf.pdf"; // create the PDF text extractor PdfTextExtract pdfTextExtract = new PdfTextExtract(); // set a demo serial number pdfTextExtract.SerialNumber = "YCgJMTAE-BiwJAhIB-EhlWTlBA-UEBRQFBA-U1FOUVJO-WVlZWQ=="; // set the text extraction mode pdfTextExtract.TextExtractMode = GetTextExtractMode(); int fromPdfPageNumber = int.Parse(textBoxFromPage.Text); int toPdfPageNumber = textBoxToPage.Text.Length > 0 ? int.Parse(textBoxToPage.Text) : 0; // extract the text from a range of pages of the PDF document string text = pdfTextExtract.ExtractText(pdfFile, fromPdfPageNumber, toPdfPageNumber); // get UTF-8 bytes byte[] utf8Bytes = Encoding.UTF8.GetBytes(text); // the UTF-8 marker byte[] utf8Marker = new byte[] { 0xEF, 0xBB, 0xBF }; // the text document bytes with UTF-8 marker followed by UTF-8 bytes byte[] bytes = new byte[utf8Bytes.Length + utf8Marker.Length]; Array.Copy(utf8Marker, 0, bytes, 0, utf8Marker.Length); Array.Copy(utf8Bytes, 0, bytes, utf8Marker.Length, utf8Bytes.Length); // inform the browser about the data format HttpContext.Current.Response.AddHeader("Content-Type", "text/plain; charset=UTF-8"); // let the browser know how to open the text document and the text document name HttpContext.Current.Response.AddHeader("Content-Disposition", String.Format("{0}; filename=ExtractedText.txt; size={1}", "attachment", bytes.Length.ToString())); // write the text buffer to HTTP response HttpContext.Current.Response.BinaryWrite(bytes); // call End() method of HTTP response to stop ASP.NET page processing HttpContext.Current.Response.End(); }
public ActionResult ExtractText(IFormCollection collection) { m_formCollection = collection; // get the PDF file string pdfFile = m_hostingEnvironment.ContentRootPath + @"\wwwroot" + @"\DemoFiles\Pdf\InputPdf.pdf"; // create the PDF text extractor PdfTextExtract pdfTextExtract = new PdfTextExtract(); // set a demo serial number pdfTextExtract.SerialNumber = "YCgJMTAE-BiwJAhIB-EhlWTlBA-UEBRQFBA-U1FOUVJO-WVlZWQ=="; // set the text extraction mode pdfTextExtract.TextExtractMode = GetTextExtractMode(); int fromPdfPageNumber = int.Parse(collection["textBoxFromPage"]); int toPdfPageNumber = collection["textBoxToPage"][0].Length > 0 ? int.Parse(collection["textBoxToPage"]) : 0; // extract the text from a range of pages of the PDF document string text = pdfTextExtract.ExtractText(pdfFile, fromPdfPageNumber, toPdfPageNumber); // get UTF-8 bytes byte[] utf8Bytes = Encoding.UTF8.GetBytes(text); // the UTF-8 marker byte[] utf8Marker = new byte[] { 0xEF, 0xBB, 0xBF }; // the text document bytes with UTF-8 marker followed by UTF-8 bytes byte[] bytes = new byte[utf8Bytes.Length + utf8Marker.Length]; Array.Copy(utf8Marker, 0, bytes, 0, utf8Marker.Length); Array.Copy(utf8Bytes, 0, bytes, utf8Marker.Length, utf8Bytes.Length); FileResult fileResult = new FileContentResult(bytes, "text/plain; charset=UTF-8"); fileResult.FileDownloadName = "ExtractedText.txt"; return(fileResult); }