static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\sample1.pdf");

            // Get page count
            int pageCount = extractor.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                // Create new stream. You can use MemoryStream or any other System.IO.Stream inheritor.
                FileStream stream = new FileStream(@".\page" + i + ".txt", FileMode.Create);

                // Save text from page to the file stream
                extractor.SavePageTextToStream(i, stream);

                // Close stream
                stream.Dispose();
            }

            // Cleanup
            extractor.Dispose();

            // Open first output file in default associated application
            ProcessStartInfo processStartInfo = new ProcessStartInfo(@".\page1.txt");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
Esempio n. 2
0
        protected void Page_Load(object sender, EventArgs e)
        {
            String inputFile = Server.MapPath(@".\bin\columns.pdf");

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            // Get dimensions of the first document page
            RectangleF rectangle = extractor.GetPageRectangle(0);

            // Get text from the 1/3 of the page

            rectangle.Width = rectangle.Width / 3f;

            Response.Clear();
            Response.ContentType = "text/html";

            extractor.SetExtractionArea(rectangle);

            Response.Write("<pre>");

            // Save extracted text to output stream
            extractor.SavePageTextToStream(0, Response.OutputStream);

            Response.Write("</pre>");

            Response.End();
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample1.pdf");

            // Get page count
            int pageCount = extractor.GetPageCount();

            string outputText = "";

            for (int i = 0; i < pageCount; i++)
            {
                // create new file stream
                FileStream fStream = new FileStream("page" + i.ToString() + ".txt", FileMode.Create);

                // save text from page #i to the file stream
                extractor.SavePageTextToStream(i, fStream);

                // close stream
                fStream.Close();
            }

            // Open first output file in default associated application
            System.Diagnostics.Process.Start("page1.txt");
        }
Esempio n. 4
0
        protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath("columns.pdf");

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            // read width of the very first page (zero index)
            float pageWidth  = extractor.GetPageRect_Width(0);
            float pageHeight = extractor.GetPageRect_Height(0);

            // now we are extracting content assuming we have 3 columns
            // equally distributed on pages

            // first calculate the width of the one column by dividing page width by number of columns (3)
            float columnWidth = pageWidth / 3f;


            Response.Clear();
            Response.ContentType = "text/html";


            // iterate through 3 columns
            for (int i = 0; i < 3; i++)
            {
                // set the extraction area to the #i column
                extractor.SetExtractionArea(i * columnWidth, 0, columnWidth, pageHeight);

                // Save extracted text to output stream
                extractor.SavePageTextToStream(0, Response.OutputStream);
            }



            Response.End();
        }