PdfExtractor C# (CSharp)代码示例

示例#1

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "input.pdf");

            //extract images
            pdfExtractor.ExtractImage();
            //get all the extracted images
            while (pdfExtractor.HasNextImage())
            {
                //read image into memory stream
                MemoryStream memoryStream = new MemoryStream();
                pdfExtractor.GetNextImage(memoryStream);

                //write to disk, if you like, or use it otherwise.
                FileStream fileStream = new
                                        FileStream(dataDir + DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create);
                memoryStream.WriteTo(fileStream);
                fileStream.Close();
            }
        }

示例#2

0

显示文件

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "input.pdf");

            //specify start and end pages
            pdfExtractor.StartPage = 1;
            pdfExtractor.EndPage   = 1;

            //use parameterless ExtractText method
            pdfExtractor.ExtractText();

            MemoryStream tempMemoryStream = new MemoryStream();

            pdfExtractor.GetText(tempMemoryStream);

            string text = "";

            //specify Unicode encoding type in StreamReader constructor
            using (StreamReader sr = new StreamReader(tempMemoryStream, Encoding.Unicode))
            {
                sr.BaseStream.Seek(0, SeekOrigin.Begin);
                text = sr.ReadToEnd();
            }

            File.WriteAllText(dataDir + "output.txt", text);
        }

示例#3

0

显示文件

文件： ExtractImagesStream.cs 项目： aspose-pdf/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:ExtractImagesStream
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();
            // Open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "ExtractImages-Stream.pdf");

            // Extract images
            pdfExtractor.ExtractImage();
            // Get all the extracted images
            while (pdfExtractor.HasNextImage())
            {
                // Read image into memory stream
                MemoryStream memoryStream = new MemoryStream();
                pdfExtractor.GetNextImage(memoryStream);

                // Write to disk, if you like, or use it otherwise.
                FileStream fileStream = new
                FileStream(dataDir+ DateTime.Now.Ticks.ToString() + "_out.jpg", FileMode.Create);
                memoryStream.WriteTo(fileStream);
                fileStream.Close();
            }
            // ExEnd:ExtractImagesStream
        }

示例#4

0

显示文件

文件： ExtractTextPage.cs 项目： zaheertariq/Aspose_Pdf_NET-1

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text();
            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "ExtractText-Page.pdf");

            //use parameterless ExtractText method
            pdfExtractor.ExtractText();

            int pageNumber = 1;

            while (pdfExtractor.HasNextPageText())
            {
                MemoryStream tempMemoryStream = new MemoryStream();
                pdfExtractor.GetNextPageText(tempMemoryStream);
                string text = "";
                //specify Unicode encoding type in StreamReader constructor
                using (StreamReader streamReader = new
                                                   StreamReader(tempMemoryStream, Encoding.Unicode))
                {
                    streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                    text = streamReader.ReadToEnd();
                }
                File.WriteAllText(dataDir + "output" + pageNumber + ".txt", text);
                pageNumber++;
            }
        }

示例#5

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "input.pdf");

            //use parameterless ExtractText method
            pdfExtractor.ExtractText();

            int pageNumber = 1;

            while (pdfExtractor.HasNextPageText())
            {
                MemoryStream tempMemoryStream = new MemoryStream();
                pdfExtractor.GetNextPageText(tempMemoryStream);
                string text = "";
                //specify Unicode encoding type in StreamReader constructor
                using (StreamReader streamReader = new
                StreamReader(tempMemoryStream, Encoding.Unicode))
                {
                    streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                    text = streamReader.ReadToEnd();
                }
                File.WriteAllText(dataDir+ "output" + pageNumber + ".txt", text);
                pageNumber++;
            }
        }

示例#6

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");

            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "Input_new.pdf");

            //set StartPage and EndPage properties to specify range
            pdfExtractor.StartPage = 10;
            pdfExtractor.EndPage = 20;

            //extract images
            pdfExtractor.ExtractImage();
            //get all the extracted images
            while (pdfExtractor.HasNextImage())
            {
                //read image into memory stream
                MemoryStream memoryStream = new MemoryStream();
                pdfExtractor.GetNextImage(memoryStream);

                //write to disk, if you like, or use it otherwise.
                FileStream fileStream = new
                FileStream(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create);
                memoryStream.WriteTo(fileStream);
                fileStream.Close();
            }
        }

示例#7

0

显示文件

        public static void SearchText()
        {
            //Please repace the trial key from trial-license.txt in download package
            //This license registration line need to be at very beginning of our other code
            LicenseManager.SetKey("trial key");

            //Copy "x86" and "x64" folders from download package to your .NET project Bin folder.
            PdfExtractor document = new PdfExtractor("sample.pdf");

            //Whether to match upper and lower case
            document.MatchCase = false;
            //Whether to match whole word only
            document.MatchWholeWord = true;

            //Search text in whole document
            List <TextInfo> infos = document.SearchText("text for search");

            //Search text in first page
            //List<TextInfo> infos = document.SearchText("text for search", 0);

            foreach (TextInfo info in infos)
            {
                Console.WriteLine(info.Text + "-" + info.PageId + "-" + info.Rect.X + "-" + info.Rect.Y);
            }
        }

示例#8

0

显示文件

文件： FolderList.cs 项目： janosymarton/CoolTool

        private void ParsePDF(ref FileObject fo, string filePath)
        {
            Aspose.Pdf.Document    pdfDocument = new Aspose.Pdf.Document(filePath);
            PdfFileInfo            pi          = new PdfFileInfo(pdfDocument);
            PdfExtractor           pe          = new PdfExtractor(pdfDocument);
            ImagePlacementAbsorber abs         = new ImagePlacementAbsorber();

            fo.pageCount         = pi.NumberOfPages;
            fo.embeddedDocsCount = pdfDocument.EmbeddedFiles.Count;
            pdfDocument.Pages.Accept(abs);
            fo.imageCount  = abs.ImagePlacements.Count;
            fo.hasPassword = pi.HasOpenPassword;
            pe.ExtractText(Encoding.ASCII);
            string tmpFolderToExtract = tmpFolder + "\\" + Guid.NewGuid();

            Directory.CreateDirectory(tmpFolderToExtract);
            string tmpTextFile = tmpFolderToExtract + "\\" + "tmpTextexport.txt";

            pe.GetText(tmpTextFile);
            fo.wordCount      = GetWordCount(tmpTextFile);
            fo.characterCount = GetCharCount(tmpTextFile);
            if (File.Exists(tmpTextFile))
            {
                File.Delete(tmpTextFile);
            }
            if (Directory.Exists(tmpFolderToExtract))
            {
                Directory.Delete(tmpFolderToExtract);
            }
        }

示例#9

0

显示文件

文件： ExtractTextPageRange.cs 项目： joyang1/Aspose_Pdf_NET

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text();
            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "ExtractText-PageRange.pdf");

            //specify start and end pages
            pdfExtractor.StartPage = 1;
            pdfExtractor.EndPage = 1;

            //use parameterless ExtractText method
            pdfExtractor.ExtractText();

            MemoryStream tempMemoryStream = new MemoryStream();
            pdfExtractor.GetText(tempMemoryStream);

            string text = "";
            //specify Unicode encoding type in StreamReader constructor
            using (StreamReader sr = new StreamReader(tempMemoryStream,Encoding.Unicode))
            {
                sr.BaseStream.Seek(0, SeekOrigin.Begin);
                text = sr.ReadToEnd();
            }

            File.WriteAllText(dataDir+ "output.txt", text);
 
            
            
        }

示例#10

0

显示文件

文件： ExtractImagesPage.cs 项目： joyang1/Aspose_Pdf_NET

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();

            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "ExtractImages-Page.pdf");

            //set StartPage and EndPage properties to the page number to
            //you want to extract images from
            pdfExtractor.StartPage = 2;
            pdfExtractor.EndPage = 2;

            //extract images
            pdfExtractor.ExtractImage();
            //get extracted images
            while (pdfExtractor.HasNextImage())
            {
                //read image into memory stream
                MemoryStream memoryStream = new MemoryStream();
                pdfExtractor.GetNextImage(memoryStream);

                //write to disk, if you like, or use it otherwise.
                FileStream fileStream = new
                FileStream(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create);
                memoryStream.WriteTo(fileStream);
                fileStream.Close();
            }
            
            
        }

示例#11

0

显示文件

文件： ExtractTextPage.cs 项目： aspose-pdf/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:ExtractTextPage
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text();
            // Open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "ExtractText-Page.pdf");

            // Use parameterless ExtractText method
            pdfExtractor.ExtractText();

            int pageNumber = 1;

            while (pdfExtractor.HasNextPageText())
            {
                MemoryStream tempMemoryStream = new MemoryStream();
                pdfExtractor.GetNextPageText(tempMemoryStream);
                string text = "";
                // Specify Unicode encoding type in StreamReader constructor
                using (StreamReader streamReader = new
                StreamReader(tempMemoryStream, Encoding.Unicode))
                {
                    streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                    text = streamReader.ReadToEnd();
                }
                File.WriteAllText(dataDir+ "output" + pageNumber + "_out.txt", text);
                pageNumber++;
            }
            // ExEnd:ExtractTextPage
        }

示例#12

0

显示文件

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();

            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "ExtractImages-Page.pdf");

            //set StartPage and EndPage properties to the page number to
            //you want to extract images from
            pdfExtractor.StartPage = 2;
            pdfExtractor.EndPage   = 2;

            //extract images
            pdfExtractor.ExtractImage();
            //get extracted images
            while (pdfExtractor.HasNextImage())
            {
                //read image into memory stream
                MemoryStream memoryStream = new MemoryStream();
                pdfExtractor.GetNextImage(memoryStream);

                //write to disk, if you like, or use it otherwise.
                FileStream fileStream = new
                                        FileStream(dataDir + DateTime.Now.Ticks.ToString() + ".jpg", FileMode.Create);
                memoryStream.WriteTo(fileStream);
                fileStream.Close();
            }
        }

示例#13

0

显示文件

文件： ExtractText.cs 项目： wuhensoft/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:ExtractText
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Text();
            // Open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "ExtractText.pdf");

            // Use parameterless ExtractText method
            pdfExtractor.ExtractText();

            MemoryStream tempMemoryStream = new MemoryStream();

            pdfExtractor.GetText(tempMemoryStream);

            string text = "";

            // Specify Unicode encoding type in StreamReader constructor
            using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode))
            {
                streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                text = streamReader.ReadToEnd();
            }

            File.WriteAllText(dataDir + "output_out.txt", text);
            // ExEnd:ExtractText
        }

示例#14

0

显示文件

文件： ExtractImagesStream.cs 项目： wuhensoft/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:ExtractImagesStream
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();
            // Open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "ExtractImages-Stream.pdf");

            // Extract images
            pdfExtractor.ExtractImage();
            // Get all the extracted images
            while (pdfExtractor.HasNextImage())
            {
                // Read image into memory stream
                MemoryStream memoryStream = new MemoryStream();
                pdfExtractor.GetNextImage(memoryStream);

                // Write to disk, if you like, or use it otherwise.
                FileStream fileStream = new
                                        FileStream(dataDir + DateTime.Now.Ticks.ToString() + "_out.jpg", FileMode.Create);
                memoryStream.WriteTo(fileStream);
                fileStream.Close();
            }
            // ExEnd:ExtractImagesStream
        }

示例#15

0

显示文件

文件： PdfContainsTextOrImages.cs 项目： wuhensoft/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:PdfContainsTextOrImages
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles();

            // Instantiate a memoryStream object to hold the extracted text from Document
            MemoryStream ms = new MemoryStream();
            // Instantiate PdfExtractor object
            PdfExtractor extractor = new PdfExtractor();

            // Bind the input PDF document to extractor
            extractor.BindPdf(dataDir + "FilledForm.pdf");
            // Extract text from the input PDF document
            extractor.ExtractText();

            bool containsText  = false;
            bool containsImage = false;

            // Save the extracted text to a text file
            extractor.GetText(ms);
            // Check if the MemoryStream length is greater than or equal to 1
            if (ms.Length >= 1)
            {
                containsText = true;
            }

            // Extract images from the input PDF document
            extractor.ExtractImage();

            // Calling HasNextImage method in while loop. When images will finish, loop will exit
            if (extractor.HasNextImage())
            {
                containsImage = true;
            }

            // Now find out whether this PDF is text only or image only
            if (containsText == true && containsImage == false)
            {
                Console.WriteLine("PDF contains text only");
            }
            else if (containsText == false && containsImage == true)
            {
                Console.WriteLine("PDF contains image only");
            }
            else if (containsText == true && containsImage == true)
            {
                Console.WriteLine("PDF contains both text and image");
            }
            else if (containsText == false && containsImage == false)
            {
                Console.WriteLine("PDF contains neither text or nor image");
            }
            // ExEnd:PdfContainsTextOrImages
        }

示例#16

0

显示文件

        public void ImageExtractPdfTextTest(string path, string code)
        {
            var extractor    = new PdfExtractor(new ContentImageExtractor());
            var rdpExtractor = new RpdContentExtractor(new RpdExtractorConfig(new List <string>(), new List <string>(), @"(?<code>\d\d\.0\d\.\d\d)($|\D)"));
            var bytes        = File.ReadAllBytes(path);

            var content = extractor.ExtractImageText(bytes, path).Result;
            var extract = rdpExtractor.Extract(content.Content);

            Assert.True(extract.Codes.Count > 0);
            Assert.AreEqual(code, extract.Codes.First());
        }

示例#17

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "input.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get extracted attachments
            pdfExtractor.GetAttachment(dataDir+ ".\\output");
        }

示例#18

0

显示文件

文件： PdfExtractorsTests.cs 项目： OnlyFart/RpdParser

        public void PdfExtractTextRpdTest(string path, string plus, string code)
        {
            var pdfExtractor = new PdfExtractor(new ContentImageExtractor());
            var rdpExtractor = new RpdContentExtractor(new RpdExtractorConfig(new List <string> {
                plus
            }, new List <string>(), @"(?<code>\d\d\.0\d\.\d\d)($|\D)"));
            var bytes = File.ReadAllBytes(path);

            var content = pdfExtractor.ExtractText(bytes, ".pdf");
            var extract = rdpExtractor.Extract(content.Content);

            Assert.True(extract.Codes.Count > 0);
            Assert.AreEqual(code, extract.Codes.First());
            Assert.AreEqual(DocumentType.Rpd, extract.DocumentType);
        }

示例#19

0

显示文件

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments();
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "ExtractAllAttachments.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get extracted attachments
            pdfExtractor.GetAttachment(dataDir + ".\\output");
        }

示例#20

0

显示文件

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "input.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get extracted attachments
            pdfExtractor.GetAttachment(dataDir + ".\\output");
        }

示例#21

0

显示文件

文件： ExtractAllAttachments.cs 项目： joyang1/Aspose_Pdf_NET

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments();
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "ExtractAllAttachments.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get extracted attachments
            pdfExtractor.GetAttachment(dataDir+ ".\\output");
            
            
        }

示例#22

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");

            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "input.pdf");

            //extract all the images
            pdfExtractor.ExtractImage();

            //get all the extracted images
            while (pdfExtractor.HasNextImage())
                pdfExtractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg");
        }

示例#23

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "input.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get attachment names
            System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames();

            foreach (string attachmentName in attachmentNames)
                Console.WriteLine("Name : {0}", attachmentName);
        }

示例#24

0

显示文件

文件： TextHelper.cs 项目： iditect/pdf-tutorial

        public static void PDF2Text()
        {
            //Copy "x86" and "x64" folders from download package to your .NET project Bin folder.
            PdfExtractor document = new PdfExtractor("sample.pdf");
            //Set whole document text property
            StringBuilder total = new StringBuilder();

            for (int i = 0; i < document.PageCount; i++)
            {
                //Extract each page text from PDF with original layout
                string pageText = document.PageToText(i);
                //You can save the page text to local file, or left in memory to other use
                File.WriteAllText(i.ToString() + ".txt", pageText, Encoding.UTF8);
                //Add each page text together
                total.Append(pageText);
            }
        }

示例#25

0

显示文件

文件： GetAttachmentNames.cs 项目： joyang1/Aspose_Pdf_NET

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments();
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "GetAttachmentNames.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get attachment names
            System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames();

            foreach (string attachmentName in attachmentNames)
                Console.WriteLine("Name : {0}", attachmentName);
            
        }

示例#26

0

显示文件

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");

            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "input.pdf");

            //extract all the images
            pdfExtractor.ExtractImage();

            //get all the extracted images
            while (pdfExtractor.HasNextImage())
            {
                pdfExtractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".jpg");
            }
        }

示例#27

0

显示文件

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();

            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "ExtractImages.pdf");

            //extract all the images
            pdfExtractor.ExtractImage();

            //get all the extracted images
            while (pdfExtractor.HasNextImage())
            {
                pdfExtractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".jpg");
            }
        }

示例#28

0

显示文件

文件： ExtractImages.cs 项目： joyang1/Aspose_Pdf_NET

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();

            //open input PDF
            PdfExtractor pdfExtractor = new PdfExtractor();
            pdfExtractor.BindPdf(dataDir+ "ExtractImages.pdf");

            //extract all the images
            pdfExtractor.ExtractImage();

            //get all the extracted images
            while (pdfExtractor.HasNextImage())
                pdfExtractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + ".jpg");
 
            
            
        }

示例#29

0

显示文件

文件： PdfContainsTextOrImages.cs 项目： aspose-pdf/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:PdfContainsTextOrImages
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles();

            // Instantiate a memoryStream object to hold the extracted text from Document
            MemoryStream ms = new MemoryStream();
            // Instantiate PdfExtractor object
            PdfExtractor extractor = new PdfExtractor();

            // Bind the input PDF document to extractor
            extractor.BindPdf(dataDir + "FilledForm.pdf");
            // Extract text from the input PDF document
            extractor.ExtractText();

            bool containsText = false;
            bool containsImage = false;
            // Save the extracted text to a text file
            extractor.GetText(ms);
            // Check if the MemoryStream length is greater than or equal to 1
            if (ms.Length >= 1)
                containsText = true;

            // Extract images from the input PDF document
            extractor.ExtractImage();

            // Calling HasNextImage method in while loop. When images will finish, loop will exit
            if (extractor.HasNextImage())
                containsImage = true;

            // Now find out whether this PDF is text only or image only
            if (containsText == true && containsImage == false)
                Console.WriteLine("PDF contains text only");
            else if (containsText == false && containsImage == true)
                Console.WriteLine("PDF contains image only");
            else if (containsText == true && containsImage == true)
                Console.WriteLine("PDF contains both text and image");
            else if (containsText == false && containsImage == false)
                Console.WriteLine("PDF contains neither text or nor image");
            // ExEnd:PdfContainsTextOrImages                      
        }

示例#30

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "input.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get attachment names
            System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames();

            foreach (string attachmentName in attachmentNames)
            {
                Console.WriteLine("Name : {0}", attachmentName);
            }
        }

示例#31

0

显示文件

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Attachments();
            //open document
            PdfExtractor pdfExtractor = new PdfExtractor();

            pdfExtractor.BindPdf(dataDir + "GetAttachmentNames.pdf");

            //extract attachments
            pdfExtractor.ExtractAttachment();

            //get attachment names
            System.Collections.IList attachmentNames = (System.Collections.IList)pdfExtractor.GetAttachNames();

            foreach (string attachmentName in attachmentNames)
            {
                Console.WriteLine("Name : {0}", attachmentName);
            }
        }

示例#32

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open input PDF
            PdfExtractor extractor = new PdfExtractor();
            extractor.BindPdf(dataDir+ "input.pdf");

            //Specify Image Extraction Mode
            extractor.ExtractImageMode = ExtractImageMode.DefinedInResources;

            //Extract Images based on Image Extraction Mode
            extractor.ExtractImage();

            //Get all the extracted images
            while (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + ".png" , System.Drawing.Imaging.ImageFormat.Png);
            }
        }

示例#33

0

显示文件

文件： PdfExtractorFeatures.cs 项目： pikaih/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:PdfExtractorFeatures
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles();

            // Create an instance of PdfExtractor class
            PdfExtractor extractor = new PdfExtractor();

            // Set PDF file password
            extractor.Password = "";
            // Specify start and end pages of the PDF
            extractor.StartPage = 1;
            extractor.EndPage   = 10;

            // Bind PDF file with the extractor object
            extractor.BindPdf(dataDir + "inFile.pdf");
            // Extract all text from the PDF
            extractor.ExtractText();
            // Save extracted text in a text file
            extractor.GetText(dataDir + "PdfExtractorFeatures_text_out_.txt");

            // Text of individual pages can also be saved individually in single text files
            if (extractor.HasNextPageText())
            {
                extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out_.txt");
            }

            // Extract images from PDF file
            extractor.ExtractImage();
            // Save each individual image in an image file
            if (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out_.jpg", System.Drawing.Imaging.ImageFormat.Jpeg);
            }

            // Extract attachments
            extractor.ExtractAttachment();
            extractor.GetAttachment(dataDir);
            // ExEnd:PdfExtractorFeatures
        }

示例#34

0

显示文件

文件： PdfExtractorFeatures.cs 项目： aspose-pdf/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:PdfExtractorFeatures
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_TechnicalArticles();

            // Create an instance of PdfExtractor class
            PdfExtractor extractor = new PdfExtractor();

            // Set PDF file password
            extractor.Password = "";
            // Specify start and end pages of the PDF
            extractor.StartPage = 1;
            extractor.EndPage = 10;

            // Bind PDF file with the extractor object
            extractor.BindPdf( dataDir +  "inFile.pdf");
            // Extract all text from the PDF
            extractor.ExtractText();
            // Save extracted text in a text file
            extractor.GetText(dataDir + "PdfExtractorFeatures_text_out.txt");

            // Text of individual pages can also be saved individually in single text files
            if (extractor.HasNextPageText())
            {
                extractor.GetNextPageText(dataDir + DateTime.Now.Ticks.ToString() + "_out.txt");
            }

            // Extract images from PDF file
            extractor.ExtractImage();
            // Save each individual image in an image file
            if (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + "_out.jpg", System.Drawing.Imaging.ImageFormat.Jpeg);
            }

            // Extract attachments
            extractor.ExtractAttachment();           
            extractor.GetAttachment(dataDir);
            // ExEnd:PdfExtractorFeatures                      
        }

示例#35

0

显示文件

        public static void PDF2Text()
        {
            //Please repace the trial key from trial-license.txt in download package
            //This license registration line need to be at very beginning of our other code
            LicenseManager.SetKey("trial key");

            //Copy "x86" and "x64" folders from download package to your .NET project Bin folder.
            PdfExtractor document = new PdfExtractor("sample.pdf");
            //Set whole document text property
            StringBuilder total = new StringBuilder();

            for (int i = 0; i < document.PageCount; i++)
            {
                //Extract each page text from PDF with original layout
                string pageText = document.PageToText(i);
                //You can save the page text to local file, or left in memory to other use
                File.WriteAllText(i.ToString() + ".txt", pageText, Encoding.UTF8);
                //Add each page text together
                total.Append(pageText);
            }
        }

示例#36

0

显示文件

文件： Program.cs 项目： Ravivishnubhotla/Aspose_Pdf_NET

        public static void Main()
        {
            // The path to the documents directory.
            string dataDir = Path.GetFullPath("../../../Data/");
            //open input PDF
            PdfExtractor extractor = new PdfExtractor();

            extractor.BindPdf(dataDir + "input.pdf");

            //Specify Image Extraction Mode
            extractor.ExtractImageMode = ExtractImageMode.DefinedInResources;

            //Extract Images based on Image Extraction Mode
            extractor.ExtractImage();

            //Get all the extracted images
            while (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png);
            }
        }

示例#37

0

显示文件

        public static void Run()
        {
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();
            //open input PDF
            PdfExtractor extractor = new PdfExtractor();

            extractor.BindPdf(dataDir + "ExtractImageExtractionMode.pdf");

            //Specify Image Extraction Mode
            extractor.ExtractImageMode = ExtractImageMode.DefinedInResources;

            //Extract Images based on Image Extraction Mode
            extractor.ExtractImage();

            //Get all the extracted images
            while (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir + DateTime.Now.Ticks.ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png);
            }
        }

示例#38

0

显示文件

文件： TextHelper.cs 项目： iditect/pdf-tutorial

        public static void SearchText()
        {
            //Copy "x86" and "x64" folders from download package to your .NET project Bin folder.
            PdfExtractor document = new PdfExtractor("sample.pdf");

            //Whether to match upper and lower case
            document.MatchCase = false;
            //Whether to match whole word only
            document.MatchWholeWord = true;

            //Search text in whole document
            List <TextInfo> infos = document.SearchText("text for search");

            //Search text in first page
            //List<TextInfo> infos = document.SearchText("text for search", 0);

            foreach (TextInfo info in infos)
            {
                Console.WriteLine(info.Text + "-" + info.PageId + "-" + info.Rect.X + "-" + info.Rect.Y);
            }
        }

示例#39

0

显示文件

文件： ExtractImageExtractionMode.cs 项目： aspose-pdf/Aspose.Pdf-for-.NET

        public static void Run()
        {
            // ExStart:ExtractImageExtractionMode
            // The path to the documents directory.
            string dataDir = RunExamples.GetDataDir_AsposePdfFacades_Images();
            // Open input PDF
            PdfExtractor extractor = new PdfExtractor();
            extractor.BindPdf(dataDir+ "ExtractImageExtractionMode.pdf");

            // Specify Image Extraction Mode
            extractor.ExtractImageMode = ExtractImageMode.DefinedInResources;

            // Extract Images based on Image Extraction Mode
            extractor.ExtractImage();

            // Get all the extracted images
            while (extractor.HasNextImage())
            {
                extractor.GetNextImage(dataDir+ DateTime.Now.Ticks.ToString() + "_out.png" , System.Drawing.Imaging.ImageFormat.Png);
            }
            // ExEnd:ExtractImageExtractionMode
        }

示例#40

0

显示文件

文件： Convert.cs 项目： vaginessa/open

 private void pdf_to_txt(save_progress progress, System.Windows.Forms.Form dlg, string fileType)
 {
     try
     {
         Aspose.Pdf.Document document = null;
         int num = 0;
         if (fileType == ".pdf")
         {
             document = this.pdf_doc;
             num      = 0;
         }
         else if ((fileType == ".doc") || (fileType == ".docx"))
         {
             document = this.doc_to_pdf(progress, dlg, 0);
             num      = 50;
         }
         else if ((fileType == ".xls") || (fileType == ".xlsx"))
         {
             document = this.xls_to_pdf(progress, dlg, 0);
             num      = 50;
         }
         else if ((fileType == ".ppt") || (fileType == ".pptx"))
         {
             document = this.ppt_to_pdf(progress, dlg, 0);
             num      = 50;
         }
         PdfExtractor extractor    = new PdfExtractor(document);
         FileStream   outputStream = new FileStream(this.global_config.target_dic + Path.GetFileNameWithoutExtension(this.file_path) + this.get_suffix(), FileMode.Create);
         extractor.ExtractTextMode = 0;
         if (progress != null)
         {
             dlg.Invoke(progress, new object[] { num });
         }
         for (int i = 1; i <= document.Pages.Count; i++)
         {
             extractor.StartPage = i;
             extractor.EndPage   = i;
             extractor.ExtractText(Encoding.UTF8);
             extractor.GetText(outputStream);
             if (progress != null)
             {
                 if (num == 50)
                 {
                     dlg.Invoke(progress, new object[] { ((i * 50) / document.Pages.Count) + 50 });
                 }
                 else
                 {
                     dlg.Invoke(progress, new object[] { (i * 100) / document.Pages.Count });
                 }
             }
         }
         outputStream.Close();
     }
     catch (Exception)
     {
         return;
     }
     if (progress != null)
     {
         dlg.Invoke(progress, new object[] { 100 });
     }
 }

C# (CSharp) PdfExtractor示例