/// <summary> /// An enumeration of paragraphs of the portable document. /// </summary> /// <param name="file">The portable document file stream.</param> /// <returns>A <see cref="IEnumerable{T}"/> of paragraphs.</returns> public static IEnumerable <string> Paragraphs(Stream file) { using (iText7.PdfReader reader = new iText7.PdfReader(file)) { using (iText7.PdfDocument doc = new iText7.PdfDocument(reader)) { int numberOfPages = doc.GetNumberOfPages(); for (int i = 1; i <= numberOfPages; i++) { iText7.PdfPage page = doc.GetPage(i); string pagetext = iText7.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page); pagetext = Common.CleanPdfText(pagetext); // Parse paragraphs. IEnumerable <string> paragraphs = pagetext.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); foreach (string item in paragraphs) { yield return(item); } } } } }
public virtual void StructElemTest03() { FileStream fos = new FileStream(destinationFolder + "structElemTest03.pdf", FileMode.Create); PdfWriter writer = new PdfWriter(fos); writer.SetCompressionLevel(CompressionConstants.NO_COMPRESSION); PdfDocument document = new PdfDocument(writer); document.SetTagged(); document.GetStructTreeRoot().GetRoleMap().Put(new PdfName("Chunk"), PdfName.Span); PdfStructElem doc = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document)); PdfPage page1 = document.AddNewPage(); PdfCanvas canvas = new PdfCanvas(page1); canvas.BeginText(); canvas.SetFontAndSize(PdfFontFactory.CreateFont(FontConstants.COURIER), 24); canvas.SetTextMatrix(1, 0, 0, 1, 32, 512); PdfStructElem paragraph = doc.AddKid(new PdfStructElem(document, PdfName.P)); PdfStructElem span1 = paragraph.AddKid(new PdfStructElem(document, PdfName.Span, page1)); canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page1, span1)))); canvas.ShowText("Hello "); canvas.CloseTag(); PdfStructElem span2 = paragraph.AddKid(new PdfStructElem(document, new PdfName("Chunk"), page1)); canvas.OpenTag(new CanvasTag(span2.AddKid(new PdfMcrNumber(page1, span2)))); canvas.ShowText("World"); canvas.CloseTag(); canvas.EndText(); canvas.Release(); PdfPage page2 = document.AddNewPage(); canvas = new PdfCanvas(page2); canvas.BeginText(); canvas.SetFontAndSize(PdfFontFactory.CreateFont(FontConstants.HELVETICA), 24); canvas.SetTextMatrix(1, 0, 0, 1, 32, 512); paragraph = doc.AddKid(new PdfStructElem(document, PdfName.P)); span1 = paragraph.AddKid(new PdfStructElem(document, PdfName.Span, page2)); canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page2, span1)))); canvas.ShowText("Hello "); canvas.CloseTag(); span2 = paragraph.AddKid(new PdfStructElem(document, new PdfName("Chunk"), page2)); canvas.OpenTag(new CanvasTag(span2.AddKid(new PdfMcrNumber(page2, span2)))); canvas.ShowText("World"); canvas.CloseTag(); canvas.EndText(); canvas.Release(); page1.Flush(); page2.Flush(); document.Close(); PdfReader reader = new PdfReader(new FileStream(destinationFolder + "structElemTest03.pdf", FileMode.Open, FileAccess.Read)); document = new PdfDocument(reader); NUnit.Framework.Assert.AreEqual(2, (int)document.GetNextStructParentIndex()); PdfPage page = document.GetPage(1); NUnit.Framework.Assert.AreEqual(0, page.GetStructParentIndex()); NUnit.Framework.Assert.AreEqual(2, page.GetNextMcid()); document.Close(); }
public virtual void StructElemTest04() { MemoryStream baos = new MemoryStream(); PdfWriter writer = new PdfWriter(baos); writer.SetCompressionLevel(CompressionConstants.NO_COMPRESSION); PdfDocument document = new PdfDocument(writer); document.SetTagged(); document.GetStructTreeRoot().GetRoleMap().Put(new PdfName("Chunk"), PdfName.Span); PdfStructElem doc = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document)); PdfPage page = document.AddNewPage(); PdfCanvas canvas = new PdfCanvas(page); canvas.BeginText(); canvas.SetFontAndSize(PdfFontFactory.CreateFont(StandardFonts.COURIER), 24); canvas.SetTextMatrix(1, 0, 0, 1, 32, 512); PdfStructElem paragraph = doc.AddKid(new PdfStructElem(document, PdfName.P)); PdfStructElem span1 = paragraph.AddKid(new PdfStructElem(document, PdfName.Span, page)); canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page, span1)))); canvas.ShowText("Hello "); canvas.CloseTag(); PdfStructElem span2 = paragraph.AddKid(new PdfStructElem(document, new PdfName("Chunk"), page)); canvas.OpenTag(new CanvasTag(span2.AddKid(new PdfMcrNumber(page, span2)))); canvas.ShowText("World"); canvas.CloseTag(); canvas.EndText(); canvas.Release(); page.Flush(); document.Close(); byte[] bytes = baos.ToArray(); PdfReader reader = new PdfReader(new MemoryStream(bytes)); writer = new PdfWriter(destinationFolder + "structElemTest04.pdf"); writer.SetCompressionLevel(CompressionConstants.NO_COMPRESSION); document = new PdfDocument(reader, writer); page = document.GetPage(1); canvas = new PdfCanvas(page); PdfStructElem p = (PdfStructElem)document.GetStructTreeRoot().GetKids()[0].GetKids()[0]; canvas.BeginText(); canvas.SetFontAndSize(PdfFontFactory.CreateFont(StandardFonts.COURIER), 24); canvas.SetTextMatrix(1, 0, 0, 1, 32, 490); //Inserting span between of 2 existing ones. span1 = p.AddKid(1, new PdfStructElem(document, PdfName.Span, page)); canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page, span1)))); canvas.ShowText("text1"); canvas.CloseTag(); //Inserting span at the end. span1 = p.AddKid(new PdfStructElem(document, PdfName.Span, page)); canvas.OpenTag(new CanvasTag(span1.AddKid(new PdfMcrNumber(page, span1)))); canvas.ShowText("text2"); canvas.CloseTag(); canvas.EndText(); canvas.Release(); page.Flush(); document.Close(); CompareResult("structElemTest04.pdf", "cmp_structElemTest04.pdf", "diff_structElem_04_"); }
/// <summary> /// Returns the index of the first occurrence of the specified page /// in this tree, or 0 if this tree does not contain the page. /// </summary> public virtual int GetPageNumber(PdfPage page) { return(pages.IndexOf(page) + 1); }