/// <summary>Extract text from a specified page using an extraction strategy.</summary> /// <remarks> /// Extract text from a specified page using an extraction strategy. /// Also allows registration of custom IContentOperators that can influence /// how (and whether or not) the PDF instructions will be parsed. /// </remarks> /// <param name="page">the page for the text to be extracted from</param> /// <param name="strategy">the strategy to use for extracting text</param> /// <param name="additionalContentOperators"> /// an optional map of custom /// <see cref="IContentOperator"/> /// s for rendering instructions /// </param> /// <returns>the extracted text</returns> public static String GetTextFromPage(PdfPage page, ITextExtractionStrategy strategy, IDictionary <String, IContentOperator > additionalContentOperators) { PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy, additionalContentOperators); parser.ProcessPageContent(page); return(strategy.GetResultantText()); }
public virtual void TestCharacterRenderInfos() { PdfCanvasProcessor parser = new PdfCanvasProcessor(new TextRenderInfoTest.CharacterPositionEventListener() ); parser.ProcessPageContent(new PdfDocument(new PdfReader(sourceFolder + "simple_text.pdf")).GetPage(FIRST_PAGE )); }
/// <summary>Processes content from the specified page number using the specified listener.</summary> /// <remarks> /// Processes content from the specified page number using the specified listener. /// Also allows registration of custom IContentOperators that can influence /// how (and whether or not) the PDF instructions will be parsed. /// </remarks> /// /// <param name="pageNumber">the page number to process</param> /// <param name="renderListener">the listener that will receive render callbacks</param> /// <param name="additionalContentOperators">an optional map of custom ContentOperators for rendering instructions /// </param> /// <returns>the provided renderListener</returns> public virtual E ProcessContent <E>(int pageNumber, E renderListener, IDictionary <String, IContentOperator> additionalContentOperators) where E : IEventListener { PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener, additionalContentOperators); processor.ProcessPageContent(pdfDocument.GetPage(pageNumber)); return(renderListener); }
public virtual void TestClosingEmptyPath() { String fileName = "closingEmptyPath.pdf"; PdfDocument document = new PdfDocument(new PdfReader(sourceFolder + fileName)); PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorTest.NoOpEventListener()); // Assert than no exception is thrown when an empty path is handled processor.ProcessPageContent(document.GetPage(1)); }
/// <summary> /// Process a PDF page to retrieve tables data from it. /// </summary> /// <param name="pdfPage">the pdf page which to process</param> /// <param name="withBorder">true if tables have fully borders, false otherwise</param> public FilterTableEventListener(PdfPage pdfPage, bool withBorder) { if (withBorder) { this.pdfPage = pdfPage; PdfCanvasProcessor processor = new PdfCanvasProcessor(this); processor.ProcessPageContent(pdfPage); GetTablesFromborders(); } }
static void AnalyzeTextFromListener(string filename) { using (var pdf = new PdfDocument(new PdfReader(filename))) { var page = pdf.GetFirstPage(); var parser = new PdfCanvasProcessor(new AnalyzeTextListener()); parser.ProcessPageContent(page); } }
public static string GetResultantText(string fileName) { using (var pdfDoc = new PdfDocument(new PdfReader(fileName))) { var strategy = new LocationTextExtractionStrategy(); var parser = new PdfCanvasProcessor(strategy); parser.ProcessPageContent(pdfDoc.GetFirstPage()); var text = strategy.GetResultantText(); return(text); } }
static void ShowLinesFromListener(string filename) { using (var pdf = new PdfDocument(new PdfReader(filename))) { var page = pdf.GetFirstPage(); var parser = new PdfCanvasProcessor(new UserPathListener()); parser.ProcessPageContent(page); } }
public virtual void ContentStreamProcessorTest() { PdfDocument document = new PdfDocument(new PdfReader(sourceFolder + "yaxiststar.pdf"), new PdfWriter(new ByteArrayOutputStream ())); for (int i = 1; i <= document.GetNumberOfPages(); ++i) { PdfPage page = document.GetPage(i); PdfCanvasProcessor processor = new PdfCanvasProcessor(new _IEventListener_40()); processor.ProcessPageContent(page); } }
public virtual void ExpectedByteAlignedTiffImageExtractionTest() { NUnit.Framework.Assert.That(() => { //Byte-aligned image is expected in pdf file, but in fact it's not String inFileName = sourceFolder + "expectedByteAlignedTiffImageExtraction.pdf"; PdfDocument pdfDocument = new PdfDocument(new PdfReader(inFileName)); GetImageBytesTest.ImageExtractor listener = new GetImageBytesTest.ImageExtractor(this); PdfCanvasProcessor processor = new PdfCanvasProcessor(listener); processor.ProcessPageContent(pdfDocument.GetPage(1)); } , NUnit.Framework.Throws.InstanceOf <iText.IO.IOException>().With.Message.EqualTo(MessageFormatUtil.Format(iText.IO.IOException.ExpectedTrailingZeroBitsForByteAlignedLines))) ; }
private ICollection <Rectangle> ProcessPage(ILocationExtractionStrategy strategy, PdfPage page) { PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy); parser.ProcessPageContent(page); IList <Rectangle> retval = new List <Rectangle>(); foreach (IPdfTextLocation l in strategy.GetResultantLocations()) { retval.Add(l.GetRectangle()); } return(retval); }
/// <summary>Get extraction strategy for given document.</summary> public static ExtractionStrategy GetExtractionStrategy(String pdfPath, String layerName, bool useActualText ) { PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath)); ExtractionStrategy strategy = new ExtractionStrategy(layerName); strategy.SetUseActualText(useActualText); PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy); processor.ProcessPageContent(pdfDocument.GetFirstPage()); pdfDocument.Close(); return(strategy); }
public virtual void CheckBboxCalculationForType3FontsWithFontMatrix02() { String inputPdf = sourceFolder + "checkBboxCalculationForType3FontsWithFontMatrix02.pdf"; PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputPdf)); GlyphBboxCalculationTest.CharacterPositionEventListener listener = new GlyphBboxCalculationTest.CharacterPositionEventListener (); PdfCanvasProcessor processor = new PdfCanvasProcessor(listener); processor.ProcessPageContent(pdfDocument.GetPage(1)); // font size (36) * |fontMatrix| (1) * glyph width (0.6) = 21.6 NUnit.Framework.Assert.AreEqual(21.6, listener.glyphWith, 1e-5); }
public virtual void ParseCircularReferencesInResourcesTest() { NUnit.Framework.Assert.That(() => { String fileName = "circularReferencesInResources.pdf"; PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + fileName)); PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorTest.NoOpEventListener()); PdfPage page = pdfDocument.GetFirstPage(); processor.ProcessPageContent(page); pdfDocument.Close(); } , NUnit.Framework.Throws.InstanceOf <OutOfMemoryException>()) ; }
/// <summary>Processes content from the specified page number using the specified listener.</summary> /// <remarks> /// Processes content from the specified page number using the specified listener. /// Also allows registration of custom ContentOperators /// </remarks> /// /// <param name="pageNumber">the page number to process</param> /// <param name="renderListener">the listener that will receive render callbacks</param> /// <param name="additionalContentOperators">an optional map of custom ContentOperators for rendering instructions /// </param> /// <returns>the provided renderListener</returns> public virtual E ProcessContent <E>(int pageNumber, E renderListener, IDictionary <String, IContentOperator> additionalContentOperators) where E : IEventListener { PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener); foreach (KeyValuePair <String, IContentOperator> entry in additionalContentOperators) { processor.RegisterContentOperator(entry.Key, entry.Value); } processor.ProcessPageContent(pdfDocument.GetPage(pageNumber)); return(renderListener); }
public virtual void CheckAverageBboxCalculationForType3FontsWithFontMatrix01Test() { String inputPdf = sourceFolder + "checkAverageBboxCalculationForType3FontsWithFontMatrix01.pdf"; PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputPdf)); GlyphBboxCalculationTest.CharacterPositionEventListener listener = new GlyphBboxCalculationTest.CharacterPositionEventListener (); PdfCanvasProcessor processor = new PdfCanvasProcessor(listener); processor.ProcessPageContent(pdfDocument.GetPage(1)); NUnit.Framework.Assert.AreEqual(600, listener.firstTextRenderInfo.GetFont().GetFontProgram().GetAvgWidth() , 0.01f); }
public void BackgroundTest(string exeFileName, string commandLineParameter, bool expectedBackground) { HtmlToPdfRunner runner = new HtmlToPdfRunner(exeFileName); string html = @"<!DOCTYPE html> <html> <head> </head> <body style=""background-color:blue;""> Test Page </body> </html>"; using (TempHtmlFile htmlFile = new TempHtmlFile(html)) { using (TempPdfFile pdfFile = new TempPdfFile(this.TestContext)) { string commandLine = string.Empty; if (!string.IsNullOrEmpty(commandLineParameter)) { commandLine += $"{commandLineParameter} "; } commandLine += $"\"{htmlFile.FilePath}\" \"{pdfFile.FilePath}\""; HtmlToPdfRunResult result = runner.Run(commandLine); Assert.AreEqual(0, result.ExitCode, result.Output); using (PdfReader pdfReader = new PdfReader(pdfFile.FilePath)) { using (PdfDocument pdfDocument = new PdfDocument(pdfReader)) { int pageCount = pdfDocument.GetNumberOfPages(); Assert.AreEqual(1, pageCount); PdfPage page = pdfDocument.GetPage(1); RectangleFinder rectangleFinder = new RectangleFinder(); PdfCanvasProcessor processor = new PdfCanvasProcessor(rectangleFinder); processor.ProcessPageContent(page); ICollection <Rectangle> boxes = rectangleFinder.GetBoundingBoxes(); Assert.AreEqual(expectedBackground ? 1 : 0, boxes.Count()); } } } } }
/// <summary>Get text from layer specified by name from page.</summary> protected internal virtual String GetTextFromPdfLayer(String pdfPath, String layerName, int page, bool useActualText ) { PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath), new DocumentProperties().SetEventCountingMetaInfo (new PdfOcrMetaInfo())); IntegrationTestHelper.ExtractionStrategy textExtractionStrategy = new IntegrationTestHelper.ExtractionStrategy (layerName); textExtractionStrategy.SetUseActualText(useActualText); PdfCanvasProcessor processor = new PdfCanvasProcessor(textExtractionStrategy); processor.ProcessPageContent(pdfDocument.GetPage(page)); pdfDocument.Close(); return(textExtractionStrategy.GetResultantText()); }
public static string GetPDFFromFile(string path) { PdfDocument pdfDoc = new PdfDocument(new PdfReader(path)); LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy(); PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy); // Known limitation: read more than one page. Sample documents are all one page long. parser.ProcessPageContent(pdfDoc.GetFirstPage()); pdfDoc.Close(); return(strategy.GetResultantText()); }
public virtual void TestNoninvertibleMatrix() { String fileName = "noninvertibleMatrix.pdf"; PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + fileName)); LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy(); PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy); PdfPage page = pdfDocument.GetFirstPage(); processor.ProcessPageContent(page); String resultantText = strategy.GetResultantText(); pdfDocument.Close(); NUnit.Framework.Assert.AreEqual("Hello World!\nHello World!\nHello World!\nHello World! Hello World! Hello World!" , resultantText); }
public virtual void ProcessGraphicsStateResourceOperatorFillOpacityTest() { PdfDocument document = new PdfDocument(new PdfReader(sourceFolder + "transparentText.pdf")); float? expOpacity = 0.5f; IDictionary <String, Object> textRenderInfo = new Dictionary <String, Object>(); for (int i = 1; i <= document.GetNumberOfPages(); ++i) { PdfPage page = document.GetPage(i); PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorIntegrationTest.RecordEveryTextRenderEvent (textRenderInfo)); processor.ProcessPageContent(page); } NUnit.Framework.Assert.AreEqual(expOpacity, textRenderInfo.Get("FillOpacity"), "Expected fill opacity not found" ); }
public virtual void PatternColorParsingNotValidPdfTest() { String inputFile = sourceFolder + "patternColorParsingNotValidPdfTest.pdf"; PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFile)); for (int i = 1; i <= pdfDocument.GetNumberOfPages(); ++i) { PdfPage page = pdfDocument.GetPage(i); PdfCanvasProcessorIntegrationTest.ColorParsingEventListener colorParsingEventListener = new PdfCanvasProcessorIntegrationTest.ColorParsingEventListener (); PdfCanvasProcessor processor = new PdfCanvasProcessor(colorParsingEventListener); processor.ProcessPageContent(page); Color renderInfo = colorParsingEventListener.GetEncounteredPath().GetFillColor(); NUnit.Framework.Assert.IsNull(renderInfo); } }
private ICollection <string> GetPDF(string filename) { var pageText = new List <string>(); using (PdfDocument pdfDocument = new PdfDocument(new PdfReader(filename))) { var pageNumbers = pdfDocument.GetNumberOfPages(); for (int i = 1; i <= pageNumbers; i++) { LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy(); PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy); parser.ProcessPageContent(pdfDocument.GetPage(i)); pageText.Add(strategy.GetResultantText()); } } return(pageText); }
public virtual void PatternColorParsingValidPdfTest() { String inputFile = sourceFolder + "patternColorParsingValidPdfTest.pdf"; PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFile)); for (int i = 1; i <= pdfDocument.GetNumberOfPages(); ++i) { PdfPage page = pdfDocument.GetPage(i); PdfCanvasProcessorIntegrationTest.ColorParsingEventListener colorParsingEventListener = new PdfCanvasProcessorIntegrationTest.ColorParsingEventListener (); PdfCanvasProcessor processor = new PdfCanvasProcessor(colorParsingEventListener); processor.ProcessPageContent(page); PathRenderInfo renderInfo = colorParsingEventListener.GetEncounteredPath(); PdfColorSpace colorSpace = renderInfo.GetGraphicsState().GetFillColor().GetColorSpace(); NUnit.Framework.Assert.IsTrue(colorSpace is PdfSpecialCs.Pattern); } }
public virtual void TestType3FontWidth() { String inFile = "type3font_text.pdf"; LineSegment origLineSegment = new LineSegment(new Vector(20.3246f, 769.4974f, 1.0f), new Vector(151.22923f , 769.4974f, 1.0f)); PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + inFile)); TextRenderInfoTest.TextPositionEventListener renderListener = new TextRenderInfoTest.TextPositionEventListener (); PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener); processor.ProcessPageContent(pdfDocument.GetPage(FIRST_PAGE)); NUnit.Framework.Assert.AreEqual(renderListener.GetLineSegments()[FIRST_ELEMENT_INDEX].GetStartPoint().Get( FIRST_ELEMENT_INDEX), origLineSegment.GetStartPoint().Get(FIRST_ELEMENT_INDEX), 1 / 2f); NUnit.Framework.Assert.AreEqual(renderListener.GetLineSegments()[FIRST_ELEMENT_INDEX].GetEndPoint().Get(FIRST_ELEMENT_INDEX ), origLineSegment.GetEndPoint().Get(FIRST_ELEMENT_INDEX), 1 / 2f); }
public void ToTxt(string absoluteFilePath, string outputPath) { using (var pdfDocument = new PdfDocument(new PdfReader(absoluteFilePath))) { for (var pageIndex = 1; pageIndex <= pdfDocument.GetNumberOfPages(); pageIndex++) { using (var fos = System.IO.File.OpenWrite(outputPath)) { var strategy = new LocationTextExtractionStrategy(); var parser = new PdfCanvasProcessor(strategy); parser.ProcessPageContent(pdfDocument.GetPage(pageIndex)); var array = Encoding.UTF8.GetBytes(strategy.GetResultantText()); fos.Write(array, 0, array.Length); fos.Flush(); } } } }
//converting pdf to a txt file public static void getTextFromPdf(String dest) { PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC)); SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy); for (var i = 1; i <= pdfDoc.GetNumberOfPages(); i++) { parser.ProcessPageContent(pdfDoc.GetPage(i)); byte[] array = Encoding.UTF8.GetBytes(strategy.GetResultantText()); using (FileStream stream = new FileStream(dest, FileMode.OpenOrCreate)) { stream.Write(array, 0, array.Length); } } }
public virtual void TestImageWithoutText() { String testName = "testImageWithoutText"; String filePath = TEST_IMAGES_DIRECTORY + "pantone_blue.jpg"; String pdfPath = GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(filePath); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader); ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), new PdfWriter(pdfPath)).Close(); PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath)); IntegrationTestHelper.ExtractionStrategy strategy = new IntegrationTestHelper.ExtractionStrategy("Text Layer" ); PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy); processor.ProcessPageContent(pdfDocument.GetFirstPage()); pdfDocument.Close(); NUnit.Framework.Assert.AreEqual("", strategy.GetResultantText()); }
public virtual void ExtractSingleInlineImageWithIndexedColorSpaceTest() { PdfDocument pdf = new PdfDocument(new PdfReader(sourceFolder + "inlineImageExtraction.pdf")); InlineImageExtractionTest.InlineImageEventListener eventListener = new InlineImageExtractionTest.InlineImageEventListener (); PdfCanvasProcessor canvasProcessor = new PdfCanvasProcessor(eventListener); canvasProcessor.ProcessPageContent(pdf.GetFirstPage()); pdf.Close(); IList <PdfStream> inlineImages = eventListener.GetInlineImages(); NUnit.Framework.Assert.AreEqual(1, inlineImages.Count); byte[] imgBytes = inlineImages[0].GetBytes(); byte[] cmpImgBytes = File.ReadAllBytes(Path.Combine(sourceFolder, "imgtest.dat")); NUnit.Framework.Assert.AreEqual(cmpImgBytes, imgBytes); PdfDictionary expectedDict = new PdfDictionary(); expectedDict.Put(PdfName.BitsPerComponent, new PdfNumber(8)); expectedDict.Put(PdfName.Height, new PdfNumber(50)); expectedDict.Put(PdfName.Width, new PdfNumber(50)); String indexedCsLookupData = "\u007F\u007F\u007Fïïï\u000F\u000F\u000F???¿¿¿ÏÏÏ///\u001F\u001F\u001F___ßßß" + "\u009F\u009F\u009FOOO¯¯¯ooo\u008F\u008F\u008F°°µ::<ââàuuy,,-ÜÜâ\u000E\u000E\u000Fúúû\u001D\u001D\u001E" + "ððõXXZ::?\u0004\u0004\u0004226!!$IIK\u0019\u0019\u001Býýþ\u0092\u0092\u0097õõø\f\f\r" + "))-÷÷úììòÍÍÓ66;\b\b\t\u0084\u0084\u0088¡¡¦îîô\u0014\u0014\u0016òòö\u0010\u0010\u0012¾¾Äffiüüýóó÷..2ûûü" + "ööù%%)ííó\u001D\u001D\u001F>>Døøúññö\u000E\u000E\u000Eééç\u008D\u008D\u008CÓÓÒCCI©©¨\u009B\u009B\u009A" + "òòñôôózz|888÷÷÷ììëÝÝãµµ¸bbb\u0095\u0095\u0098··¶ûûûºº¼\u0089\u0089\u008Bååãêêë==>ÑÑÖ***qqpààåZZ\\õõõ" + "\u007F\u007F~\u008E\u008E\u008E\u001E\u001E\u001FÀÀÅååèÆÆÅççåÇÇÊ\u001C\u001C\u001C]]^±±¶TTTççêÉÉÇFFFáá" + "æÅÅÄyy{ÍÍÎÐÐÕ^^^vvyîîí\u0087\u0087\u008A}}}xxzÊÊËjjl--.ëëò\u0000\u0000\u0000ÿÿÿ{{{|||}}}~~~\u007F\u007F" + "\u007F\u0080\u0080\u0080\u0081\u0081\u0081\u0082\u0082\u0082\u0083\u0083\u0083\u0084\u0084\u0084\u0085" + "\u0085\u0085\u0086\u0086\u0086\u0087\u0087\u0087\u0088\u0088\u0088\u0089\u0089\u0089\u008A\u008A\u008A" + "\u008B\u008B\u008B\u008C\u008C\u008C\u008D\u008D\u008D\u008E\u008E\u008E\u008F\u008F\u008F\u0090\u0090" + "\u0090\u0091\u0091\u0091\u0092\u0092\u0092\u0093\u0093\u0093\u0094\u0094\u0094\u0095\u0095\u0095\u0096" + "\u0096\u0096\u0097\u0097\u0097\u0098\u0098\u0098\u0099\u0099\u0099\u009A\u009A\u009A\u009B\u009B\u009B" + "\u009C\u009C\u009C\u009D\u009D\u009D\u009E\u009E\u009E\u009F\u009F\u009F ¡¡¡¢¢¢£££¤¤¤¥¥¥¦¦¦§§§¨¨¨©©©" + "ªªª«««¬¬¬\u00AD\u00AD\u00AD®®®¯¯¯°°°±±±²²²³³³´´´µµµ¶¶¶···¸¸¸¹¹¹ººº»»»¼¼¼½½½¾¾¾¿¿¿ÀÀÀÁÁÁÂÂÂÃÃÃÄÄÄÅÅÅÆÆÆ" + "ÇÇÇÈÈÈÉÉÉÊÊÊËËËÌÌÌÍÍÍÎÎÎÏÏÏÐÐÐÑÑÑÒÒÒÓÓÓÔÔÔÕÕÕÖÖÖ×××ØØØÙÙÙÚÚÚÛÛÛÜÜÜÝÝÝÞÞÞßßßàààáááâââãããäääåååæææçççèèè" + "éééêêêëëëìììíííîîîïïïðððñññòòòóóóôôôõõõööö÷÷÷øøøùùùúúúûûûüüüýýýþþþÿÿÿ"; PdfSpecialCs.Indexed expectedIndexedCs = new PdfSpecialCs.Indexed(PdfName.DeviceRGB, 255, new PdfString(indexedCsLookupData )); expectedDict.Put(PdfName.ColorSpace, expectedIndexedCs.GetPdfObject()); NUnit.Framework.Assert.IsTrue(new CompareTool().CompareDictionaries(inlineImages[0], expectedDict)); }
public virtual void ContentStreamProcessorTest() { PdfDocument document = new PdfDocument(new PdfReader(sourceFolder + "tableWithImageAndText.pdf"), new PdfWriter (new ByteArrayOutputStream())); StringBuilder pageEventsLog = new StringBuilder(); for (int i = 1; i <= document.GetNumberOfPages(); ++i) { PdfPage page = document.GetPage(i); PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorIntegrationTest.RecordEveryHighLevelEventListener (pageEventsLog)); processor.ProcessPageContent(page); } byte[] logBytes = File.ReadAllBytes(System.IO.Path.Combine(sourceFolder + "contentStreamProcessorTest_events_log.dat" )); String expectedPageEventsLog = iText.IO.Util.JavaUtil.GetStringForBytes(logBytes, System.Text.Encoding.UTF8 ); NUnit.Framework.Assert.AreEqual(expectedPageEventsLog, pageEventsLog.ToString()); }