public virtual void SpaceTrimColumnTextTest() { Document doc = new Document(PageSize.A4, 50, 30, 50, 30); PdfWriter writer = PdfWriter.GetInstance(doc, new FileStream(OUTSPTRIMCT, FileMode.Create)); doc.Open(); Phrase under = new Phrase(); under.Font = new Font(Font.FontFamily.TIMES_ROMAN, 12, Font.UNDERLINE); under.Add(new Chunk(" 1 1 9 ")); Paragraph underlineTest = new Paragraph(under); underlineTest.KeepTogether = true; doc.Add(underlineTest); doc.Close(); writer.Close(); PdfReader reader = new PdfReader(OUTSPTRIMCT); MyTextRenderListener listener = new MyTextRenderListener(); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(1); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic); Assert.IsTrue(listener.GetText().Length == 60, "Unexpected text length"); }
public static string ExtractText(string pdfFilename, int pageNumber) { PdfReader reader = new PdfReader(pdfFilename); MyTextRenderListener listener = new MyTextRenderListener(); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(pageNumber); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic); return(listener.Text.ToString()); }
virtual public void WeirdHyphensTest() { PdfReader reader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "WeirdHyphens.pdf"); List<String> textChunks = new List<String>(); IRenderListener listener = new MyTextRenderListener(textChunks); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(1); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic); /** * This assertion makes sure that encoding has been read properly from FontDescriptor. * If not the vallue will be "\u0000 14". */ Assert.AreEqual("\u0096 14", textChunks[18]); reader.Close(); }
public void extractSnippets(String src, String dest) { TextWriter output = new StreamWriter(new FileStream(dest, FileMode.Create)); PdfReader reader = new PdfReader(src); IRenderListener listener = new MyTextRenderListener(output); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(1); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic); output.Flush(); output.Close(); reader.Close(); }
virtual public void WeirdHyphensTest() { PdfReader reader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "WeirdHyphens.pdf"); List <String> textChunks = new List <String>(); IRenderListener listener = new MyTextRenderListener(textChunks); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(1); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic); /** * This assertion makes sure that encoding has been read properly from FontDescriptor. * If not the vallue will be "\u0000 14". */ Assert.AreEqual("\u0096 14", textChunks[18]); reader.Close(); }