示例#1
0
        protected void buttonExtractText_Click(object sender, EventArgs e)
        {
            // get the PDF file
            string pdfFile = Server.MapPath("~") + @"\DemoFiles\Pdf\InputPdf.pdf";

            // create the PDF text extractor
            PdfTextExtract pdfTextExtract = new PdfTextExtract();

            // set a demo serial number
            pdfTextExtract.SerialNumber = "YCgJMTAE-BiwJAhIB-EhlWTlBA-UEBRQFBA-U1FOUVJO-WVlZWQ==";

            // set the text extraction mode
            pdfTextExtract.TextExtractMode = GetTextExtractMode();

            int fromPdfPageNumber = int.Parse(textBoxFromPage.Text);
            int toPdfPageNumber   = textBoxToPage.Text.Length > 0 ? int.Parse(textBoxToPage.Text) : 0;

            // extract the text from a range of pages of the PDF document
            string text = pdfTextExtract.ExtractText(pdfFile, fromPdfPageNumber, toPdfPageNumber);

            // get UTF-8 bytes
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);

            // the UTF-8 marker
            byte[] utf8Marker = new byte[] { 0xEF, 0xBB, 0xBF };

            // the text document bytes with UTF-8 marker followed by UTF-8 bytes
            byte[] bytes = new byte[utf8Bytes.Length + utf8Marker.Length];
            Array.Copy(utf8Marker, 0, bytes, 0, utf8Marker.Length);
            Array.Copy(utf8Bytes, 0, bytes, utf8Marker.Length, utf8Bytes.Length);

            // inform the browser about the data format
            HttpContext.Current.Response.AddHeader("Content-Type", "text/plain; charset=UTF-8");

            // let the browser know how to open the text document and the text document name
            HttpContext.Current.Response.AddHeader("Content-Disposition",
                                                   String.Format("{0}; filename=ExtractedText.txt; size={1}", "attachment", bytes.Length.ToString()));

            // write the text buffer to HTTP response
            HttpContext.Current.Response.BinaryWrite(bytes);

            // call End() method of HTTP response to stop ASP.NET page processing
            HttpContext.Current.Response.End();
        }
示例#2
0
        public ActionResult ExtractText(IFormCollection collection)
        {
            m_formCollection = collection;

            // get the PDF file
            string pdfFile = m_hostingEnvironment.ContentRootPath + @"\wwwroot" + @"\DemoFiles\Pdf\InputPdf.pdf";

            // create the PDF text extractor
            PdfTextExtract pdfTextExtract = new PdfTextExtract();

            // set a demo serial number
            pdfTextExtract.SerialNumber = "YCgJMTAE-BiwJAhIB-EhlWTlBA-UEBRQFBA-U1FOUVJO-WVlZWQ==";

            // set the text extraction mode
            pdfTextExtract.TextExtractMode = GetTextExtractMode();

            int fromPdfPageNumber = int.Parse(collection["textBoxFromPage"]);
            int toPdfPageNumber   = collection["textBoxToPage"][0].Length > 0 ? int.Parse(collection["textBoxToPage"]) : 0;

            // extract the text from a range of pages of the PDF document
            string text = pdfTextExtract.ExtractText(pdfFile, fromPdfPageNumber, toPdfPageNumber);

            // get UTF-8 bytes
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);

            // the UTF-8 marker
            byte[] utf8Marker = new byte[] { 0xEF, 0xBB, 0xBF };

            // the text document bytes with UTF-8 marker followed by UTF-8 bytes
            byte[] bytes = new byte[utf8Bytes.Length + utf8Marker.Length];
            Array.Copy(utf8Marker, 0, bytes, 0, utf8Marker.Length);
            Array.Copy(utf8Bytes, 0, bytes, utf8Marker.Length, utf8Bytes.Length);

            FileResult fileResult = new FileContentResult(bytes, "text/plain; charset=UTF-8");

            fileResult.FileDownloadName = "ExtractedText.txt";

            return(fileResult);
        }