Exemplo n.º 1
0
        void  LoadDocument()
        {
            MemoryStream  memory   = new MemoryStream(convertDocToByteArray(@"D:\Code Project\Subrip\Document\HSK Book.pdf"));
            BinaryReader  BRreader = new BinaryReader(memory);
            StringBuilder text     = new StringBuilder();


            iText.Kernel.Pdf.PdfReader   iTextReader = new iText.Kernel.Pdf.PdfReader(memory);
            iText.Kernel.Pdf.PdfDocument pdfDoc      = new iText.Kernel.Pdf.PdfDocument(iTextReader);



            int numberofpages = pdfDoc.GetNumberOfPages();
            List <Tuple <int, string> > Contents = new List <Tuple <int, string> >();


            for (int page = 1; page <= numberofpages; page++)
            {
                iText.Kernel.Pdf.Canvas.Parser.Listener.ITextExtractionStrategy strategy = new iText.Kernel.Pdf.Canvas.Parser.Listener.LocationTextExtractionStrategy();

                string currentText = iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(page), strategy);
                currentText = Encoding.UTF8.GetString(UTF8Encoding.Convert(
                                                          Encoding.UTF8, Encoding.UTF8, Encoding.UTF8.GetBytes(currentText)));
                text.Append(currentText);


                Tuple <int, string> tuple = new Tuple <int, string>(page, currentText);
                Contents.Add(tuple);
            }


            document = Contents;
        }
        public IEnumerable <StockAdvice> ReadStockAdvice(FileInfo file)
        {
            var stockAdvices = new List <StockAdvice>();
            var reader       = new iText.Kernel.Pdf.PdfReader(file);
            var doc          = new iText.Kernel.Pdf.PdfDocument(reader);
            var pageCount    = doc.GetNumberOfPages();

            if (pageCount > 0)
            {
                for (int pageNum = 1; pageNum < pageCount; pageNum++)
                {
                    var    page        = doc.GetPage(pageNum);
                    var    pageData    = page.GetContentBytes();
                    string pageContent = Encoding.UTF8.GetString(pageData);
                    var    folder      = file.Directory;
                    var    fileName    = Path.Combine(folder.FullName, file.Name + "_Page_" + pageNum + ".txt");
                    WritePageContentToFile(fileName, pageContent);
                }
            }

            doc.Close();
            reader.Close();

            return(stockAdvices);
        }
Exemplo n.º 3
0
        public Page[] GetBlocks(byte[] contents)
        {
            List <Page> lstPages = new List <Page>();

            using (var stm = new System.IO.MemoryStream(contents))
            {
                using (var pdfReader = new iText.Kernel.Pdf.PdfReader(stm))
                {
                    using (iText.Kernel.Pdf.PdfDocument doc = new iText.Kernel.Pdf.PdfDocument(pdfReader))
                    {
                        int numOfPages = doc.GetNumberOfPages();
                        for (int page = 1; page <= numOfPages; page++)
                        {
                            var pdfPage  = doc.GetPage(page);
                            var pg       = new Page();
                            var rotation = pdfPage.GetPageSizeWithRotation();
                            pg.Height = rotation.GetHeight();
                            pg.Width  = rotation.GetWidth();
                            var customListener = new CustomEventListener();
                            var parser         = new PdfCanvasProcessor(customListener);
                            parser.ProcessPageContent(pdfPage);
                            var lstBlocks = customListener.Blocks;
                            pg.Blocks = customListener.Blocks.ToArray();
                            lstPages.Add(pg);
                        }
                    }
                }
            }
            return(lstPages.ToArray());
        }
Exemplo n.º 4
0
        /// <summary>
        /// Reads PDF file by a given path.
        /// </summary>
        /// <param name="path">The path to the file</param>
        /// <param name="pageCount">The number of pages to read (0=all, 1 by default) </param>
        /// <returns></returns>
        public static DocumentTree PdfToText(string path, int pageCount = 1)
        {
            var pages = new DocumentTree();

            using (iText.Kernel.Pdf.PdfReader reader = new iText.Kernel.Pdf.PdfReader(path))
            {
                using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(reader))
                {
                    var strategy = new iText.Kernel.Pdf.Canvas.Parser.Listener.LocationTextExtractionStrategy();

                    // set up pages to read
                    int pagesToRead = 1;
                    if (pageCount > 0)
                    {
                        pagesToRead = pageCount;
                    }
                    if (pagesToRead > pdfDocument.GetNumberOfPages() || pageCount == 0)
                    {
                        pagesToRead = pdfDocument.GetNumberOfPages();
                    }

                    // for each page to read...
                    for (int i = 1; i <= pagesToRead; ++i)
                    {
                        // get the page and save it
                        var page = pdfDocument.GetPage(i);
                        var txt  = iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page, strategy);
                        pages.Add(txt);
                    }
                    pdfDocument.Close();
                    reader.Close();
                }
            }
            return(pages);
        }
Exemplo n.º 5
0
        public override BookAtHome GetPocoBook(string filepath)
        {
            iText.Kernel.Pdf.PdfReader   reader = null;
            iText.Kernel.Pdf.PdfDocument pDoc   = null;
            const int extracted_phrase_len      = 500;

            try
            {
                if (File.Exists(filepath))
                {
                    reader = new iText.Kernel.Pdf.PdfReader(filepath);
                    pDoc   = new iText.Kernel.Pdf.PdfDocument(reader);
                    int    nPages      = pDoc.GetNumberOfPages();
                    int    maxsearch   = nPages < 15 ? nPages : 10;
                    string currentText = string.Empty;
                    for (int i = 1; i <= maxsearch; i++)
                    {
                        ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                        currentText = PdfTextExtractor.GetTextFromPage(pDoc.GetPage(i), strategy);

                        currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));

                        if (currentText.ToLower().Contains("isbn", System.StringComparison.InvariantCulture))
                        {
                            string pattern = @"ISBN(-1(?:(0)|3))?:?\x20(\s)*[0-9]+[- ][0-9]+[- ][0-9]+[- ][0-9]*[- ]*[xX0-9]";

                            Match m = Regex.Match(currentText.Replace(":", "").Replace("-", " "), pattern);

                            if (m.Success)
                            {
                                PocoBook retBook = base.GetPocoBook(filepath) as PocoBook;
                                retBook.Isbnsearch = m.Value;
                                return(retBook);
                            }
                        }
                    }
                    PocoBook abook = base.GetPocoBook(filepath) as PocoBook;
                    int      len   = currentText.Length < extracted_phrase_len ? currentText.Length : extracted_phrase_len;
                    abook.SearchPhrase = currentText.Substring(0, len);

                    return(abook);
                }
            }
            catch (IOException)
            {
                return(new PocoBook(filepath));
            }
            finally
            {
                ((IDisposable)reader)?.Dispose();
                ((IDisposable)pDoc)?.Dispose();
                reader?.Close();
            }

            return(base.GetPocoBook(filepath));
        }
Exemplo n.º 6
0
        /// <summary>
        /// 利用itext7生成文字签名
        /// </summary>
        public void ConvertPdf1()
        {
            string sourcePath = $"C:\\test\\source.pdf";
            string targetPath = $"C:\\test\\target.pdf";
            string fontPath   = $"C:\\Windows\\Fonts\\simkai.ttf";

            string signPath1 = @"C:\Users\Administrator\Desktop\a.png";
            string signPath2 = @"C:\Users\Administrator\Desktop\b.png";
            string signPath3 = @"C:\Users\Administrator\Desktop\c.png";
            string signPath4 = @"C:\Users\Administrator\Desktop\d.png";


            //输入PDF
            using (iText.Kernel.Pdf.PdfReader reader = new iText.Kernel.Pdf.PdfReader(sourcePath))
            {
                //输出PDF
                using (iText.Kernel.Pdf.PdfWriter writer = new iText.Kernel.Pdf.PdfWriter(targetPath))
                {
                    //获取PDF对象
                    using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(reader, writer))
                    {
                        //获取Document对象
                        using (iText.Layout.Document document = new iText.Layout.Document(pdfDocument))
                        {
                            //从物理文件加载图片
                            iText.Layout.Element.Image image1 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath1));
                            iText.Layout.Element.Image image2 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath2));
                            iText.Layout.Element.Image image3 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath3));
                            iText.Layout.Element.Image image4 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath4));

                            //将图片绘制到PDF的绝对坐标上,同时缩放图片
                            //坐标与绘制文字的坐标几乎一致,稍微向左,向上一些
                            //缩放的宽度与后面的宽度一致,示例中是200
                            //缩放的高度计算两个签名之间的高度差,例如93-73=20
                            //注意示例采用的签名图片的尺寸是:400px * 150px,应当采取和它差不多的尺寸效果最佳
                            document.Add(image1.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 93, 200));
                            document.Add(image2.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 73, 200));
                            document.Add(image3.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 53, 200));
                            document.Add(image4.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 33, 200));

                            //加载字体
                            iText.Kernel.Font.PdfFont font = iText.Kernel.Font.PdfFontFactory.CreateFont(fontPath, iText.IO.Font.PdfEncodings.IDENTITY_H, true);

                            //添加文本
                            document.Add(new iText.Layout.Element.Paragraph("签名1").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 90, 200));
                            document.Add(new iText.Layout.Element.Paragraph("签名2").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 70, 200));
                            document.Add(new iText.Layout.Element.Paragraph("签名3").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 50, 200));
                            document.Add(new iText.Layout.Element.Paragraph("签名4").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 30, 200));
                        }
                    }
                }
            }
        }
Exemplo n.º 7
0
        public static bool CompareAgainstPdf(TestContext testContext, string filenameOutput, string filenameShall, int nAcceptedDifferences = 0)
        {
            var pdfReaderOutput = new iText.Kernel.Pdf.PdfReader(filenameOutput);
            var pdfOutput       = new iText.Kernel.Pdf.PdfDocument(pdfReaderOutput);

            var pdfReaderShall = new iText.Kernel.Pdf.PdfReader(filenameShall);
            var pdfShall       = new iText.Kernel.Pdf.PdfDocument(pdfReaderShall);

            var ct     = new iText.Kernel.Utils.CompareTool();
            var result = ct.CompareByCatalog(pdfOutput, pdfShall);

            testContext.WriteLine(string.Format("Diff of {0} <-> {1}", filenameOutput, filenameShall));
            foreach (var dif in result.GetDifferences())
            {
                testContext.WriteLine(dif.Value);
            }
            return(result.GetDifferences().Count <= nAcceptedDifferences);
        }
Exemplo n.º 8
0
            //[Benchmark]
            //public void iText_Split_125Mb_gt_7500pages_10_pages()
            //{
            //    RuniTextBenchmark("sample_125Mb_gt_7500pages.pdf", 10);
            //}

            public void RuniTextBenchmark(string fileToSplit, int splitByPagesNumber, int?pagesCountToProcess = null)
            {
                var srcFile = Path.Combine(_rootFolder, fileToSplit);
                var file    = new FileInfo(srcFile);
                var name    = file.Name.Substring(0, file.Name.LastIndexOf(".", StringComparison.Ordinal));

                using (var reader = new iText.Kernel.Pdf.PdfReader(srcFile))
                {
                    var doc = new iText.Kernel.Pdf.PdfDocument(reader);

                    var splitter          = new CustomFileSplitter(doc, _resultsiTextFolder, name);
                    var splittedDocuments = splitter.SplitByPageCount(splitByPagesNumber);

                    foreach (var sd in splittedDocuments)
                    {
                        sd.Close();
                    }
                    doc.Close();
                }
            }
Exemplo n.º 9
0
        public static PdfDocument CompatibleOpen(MemoryStream inputStream, PdfDocumentOpenMode openMode)
        {
            PdfDocument pdfDocument = null;

            inputStream.Position = 0;

            try
            {
                pdfDocument = PdfReader.Open(inputStream, openMode);
            }
            catch (PdfSharp.Pdf.IO.PdfReaderException)
            {
                inputStream.Position = 0;
                MemoryStream outputStream = new MemoryStream();

                iText.Kernel.Pdf.WriterProperties writerProperties = new iText.Kernel.Pdf.WriterProperties();
                writerProperties.SetPdfVersion(iText.Kernel.Pdf.PdfVersion.PDF_1_4);

                iText.Kernel.Pdf.PdfReader pdfReader = new iText.Kernel.Pdf.PdfReader(inputStream);

                iText.Kernel.Pdf.PdfDocument pdfStamper = new iText.Kernel.Pdf.PdfDocument(pdfReader, new iText.Kernel.Pdf.PdfWriter(outputStream, writerProperties));

                iText.Forms.PdfAcroForm pdfForm = iText.Forms.PdfAcroForm.GetAcroForm(pdfStamper, true);
                if (pdfForm != null)
                {
                    pdfForm.FlattenFields();
                }
                writerProperties.SetFullCompressionMode(false);

                pdfStamper.GetWriter().SetCloseStream(false);
                pdfStamper.Close();

                pdfDocument = PdfReader.Open(outputStream, openMode);
            }
            return(pdfDocument);
        }