예제 #1
0
        /// <summary>
        /// An enumeration of paragraphs of the portable document.
        /// </summary>
        /// <param name="file">The portable document file stream.</param>
        /// <returns>A <see cref="IEnumerable{T}"/> of paragraphs.</returns>
        public static IEnumerable <string> Paragraphs(Stream file)
        {
            using (iText7.PdfReader reader = new iText7.PdfReader(file))
            {
                using (iText7.PdfDocument doc = new iText7.PdfDocument(reader))
                {
                    int numberOfPages = doc.GetNumberOfPages();
                    for (int i = 1; i <= numberOfPages; i++)
                    {
                        iText7.PdfPage page     = doc.GetPage(i);
                        string         pagetext = iText7.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page);
                        pagetext = Common.CleanPdfText(pagetext);

                        // Parse paragraphs.
                        IEnumerable <string> paragraphs = pagetext.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
                        foreach (string item in paragraphs)
                        {
                            yield return(item);
                        }
                    }
                }
            }
        }
예제 #2
0
        public virtual void StructElemTest03()
        {
            FileStream fos    = new FileStream(destinationFolder + "structElemTest03.pdf", FileMode.Create);
            PdfWriter  writer = new PdfWriter(fos);

            writer.SetCompressionLevel(CompressionConstants.NO_COMPRESSION);
            PdfDocument document = new PdfDocument(writer);

            document.SetTagged();
            document.GetStructTreeRoot().GetRoleMap().Put(new PdfName("Chunk"), PdfName.Span);
            PdfStructElem doc    = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document));
            PdfPage       page1  = document.AddNewPage();
            PdfCanvas     canvas = new PdfCanvas(page1);

            canvas.BeginText();
            canvas.SetFontAndSize(PdfFontFactory.CreateFont(FontConstants.COURIER), 24);
            canvas.SetTextMatrix(1, 0, 0, 1, 32, 512);
            PdfStructElem paragraph = doc.AddKid(new PdfStructElem(document, PdfName.P));
            PdfStructElem span1     = paragraph.AddKid(new PdfStructElem(document, PdfName.Span, page1));

            canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page1, span1))));
            canvas.ShowText("Hello ");
            canvas.CloseTag();
            PdfStructElem span2 = paragraph.AddKid(new PdfStructElem(document, new PdfName("Chunk"), page1));

            canvas.OpenTag(new CanvasTag(span2.AddKid(new PdfMcrNumber(page1, span2))));
            canvas.ShowText("World");
            canvas.CloseTag();
            canvas.EndText();
            canvas.Release();
            PdfPage page2 = document.AddNewPage();

            canvas = new PdfCanvas(page2);
            canvas.BeginText();
            canvas.SetFontAndSize(PdfFontFactory.CreateFont(FontConstants.HELVETICA), 24);
            canvas.SetTextMatrix(1, 0, 0, 1, 32, 512);
            paragraph = doc.AddKid(new PdfStructElem(document, PdfName.P));
            span1     = paragraph.AddKid(new PdfStructElem(document, PdfName.Span, page2));
            canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page2, span1))));
            canvas.ShowText("Hello ");
            canvas.CloseTag();
            span2 = paragraph.AddKid(new PdfStructElem(document, new PdfName("Chunk"), page2));
            canvas.OpenTag(new CanvasTag(span2.AddKid(new PdfMcrNumber(page2, span2))));
            canvas.ShowText("World");
            canvas.CloseTag();
            canvas.EndText();
            canvas.Release();
            page1.Flush();
            page2.Flush();
            document.Close();
            PdfReader reader = new PdfReader(new FileStream(destinationFolder + "structElemTest03.pdf", FileMode.Open,
                                                            FileAccess.Read));

            document = new PdfDocument(reader);
            NUnit.Framework.Assert.AreEqual(2, (int)document.GetNextStructParentIndex());
            PdfPage page = document.GetPage(1);

            NUnit.Framework.Assert.AreEqual(0, page.GetStructParentIndex());
            NUnit.Framework.Assert.AreEqual(2, page.GetNextMcid());
            document.Close();
        }
예제 #3
0
        public virtual void StructElemTest04()
        {
            MemoryStream baos   = new MemoryStream();
            PdfWriter    writer = new PdfWriter(baos);

            writer.SetCompressionLevel(CompressionConstants.NO_COMPRESSION);
            PdfDocument document = new PdfDocument(writer);

            document.SetTagged();
            document.GetStructTreeRoot().GetRoleMap().Put(new PdfName("Chunk"), PdfName.Span);
            PdfStructElem doc    = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document));
            PdfPage       page   = document.AddNewPage();
            PdfCanvas     canvas = new PdfCanvas(page);

            canvas.BeginText();
            canvas.SetFontAndSize(PdfFontFactory.CreateFont(StandardFonts.COURIER), 24);
            canvas.SetTextMatrix(1, 0, 0, 1, 32, 512);
            PdfStructElem paragraph = doc.AddKid(new PdfStructElem(document, PdfName.P));
            PdfStructElem span1     = paragraph.AddKid(new PdfStructElem(document, PdfName.Span, page));

            canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page, span1))));
            canvas.ShowText("Hello ");
            canvas.CloseTag();
            PdfStructElem span2 = paragraph.AddKid(new PdfStructElem(document, new PdfName("Chunk"), page));

            canvas.OpenTag(new CanvasTag(span2.AddKid(new PdfMcrNumber(page, span2))));
            canvas.ShowText("World");
            canvas.CloseTag();
            canvas.EndText();
            canvas.Release();
            page.Flush();
            document.Close();
            byte[]    bytes  = baos.ToArray();
            PdfReader reader = new PdfReader(new MemoryStream(bytes));

            writer = new PdfWriter(destinationFolder + "structElemTest04.pdf");
            writer.SetCompressionLevel(CompressionConstants.NO_COMPRESSION);
            document = new PdfDocument(reader, writer);
            page     = document.GetPage(1);
            canvas   = new PdfCanvas(page);
            PdfStructElem p = (PdfStructElem)document.GetStructTreeRoot().GetKids()[0].GetKids()[0];

            canvas.BeginText();
            canvas.SetFontAndSize(PdfFontFactory.CreateFont(StandardFonts.COURIER), 24);
            canvas.SetTextMatrix(1, 0, 0, 1, 32, 490);
            //Inserting span between of 2 existing ones.
            span1 = p.AddKid(1, new PdfStructElem(document, PdfName.Span, page));
            canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page, span1))));
            canvas.ShowText("text1");
            canvas.CloseTag();
            //Inserting span at the end.
            span1 = p.AddKid(new PdfStructElem(document, PdfName.Span, page));
            canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page, span1))));
            canvas.ShowText("text2");
            canvas.CloseTag();
            canvas.EndText();
            canvas.Release();
            page.Flush();
            document.Close();
            CompareResult("structElemTest04.pdf", "cmp_structElemTest04.pdf", "diff_structElem_04_");
        }
예제 #4
0
 /// <summary>
 /// Returns the index of the first occurrence of the specified page
 /// in this tree, or 0 if this tree does not contain the page.
 /// </summary>
 public virtual int GetPageNumber(PdfPage page)
 {
     return(pages.IndexOf(page) + 1);
 }