public virtual void TestLigatureBeforeLigature() { System.Console.Out.WriteLine(new FileInfo(sourceFolder).FullName); PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "ligature.pdf")); // build strategy RegexBasedLocationExtractionStrategy extractionStrategy = new RegexBasedLocationExtractionStrategy("ca"); // get locations IList <IPdfTextLocation> locationList = new List <IPdfTextLocation>(); for (int x = 1; x <= pdfDocument.GetNumberOfPages(); x++) { new PdfCanvasProcessor(extractionStrategy).ProcessPageContent(pdfDocument.GetPage(x)); foreach (IPdfTextLocation location in extractionStrategy.GetResultantLocations()) { if (location != null) { locationList.Add(location); } } } // compare NUnit.Framework.Assert.AreEqual(1, locationList.Count); IPdfTextLocation loc = locationList[0]; NUnit.Framework.Assert.AreEqual("ca", loc.GetText()); Rectangle rect = loc.GetRectangle(); NUnit.Framework.Assert.AreEqual(36, rect.GetX(), 0.0001); NUnit.Framework.Assert.AreEqual(655.4600, rect.GetY(), 0.0001); NUnit.Framework.Assert.AreEqual(25.1000, rect.GetWidth(), 0.0001); NUnit.Framework.Assert.AreEqual(20, rect.GetHeight(), 0.0001); pdfDocument.Close(); }
public virtual void Test01() { System.Console.Out.WriteLine(new FileInfo(sourceFolder).FullName); PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "in01.pdf")); // build strategy RegexBasedLocationExtractionStrategy extractionStrategy = new RegexBasedLocationExtractionStrategy(iText.IO.Util.StringUtil.RegexCompile ("\\{\\{Signature\\}\\}")); // get locations IList <IPdfTextLocation> locationList = new List <IPdfTextLocation>(); for (int x = 1; x <= pdfDocument.GetNumberOfPages(); x++) { new PdfCanvasProcessor(extractionStrategy).ProcessPageContent(pdfDocument.GetPage(x)); foreach (IPdfTextLocation location in extractionStrategy.GetResultantLocations()) { if (location != null) { locationList.Add(location); } } } // compare NUnit.Framework.Assert.AreEqual(1, locationList.Count); IPdfTextLocation loc = locationList[0]; NUnit.Framework.Assert.AreEqual("{{Signature}}", loc.GetText()); NUnit.Framework.Assert.AreEqual(23, (int)loc.GetRectangle().GetX()); NUnit.Framework.Assert.AreEqual(375, (int)loc.GetRectangle().GetY()); NUnit.Framework.Assert.AreEqual(55, (int)loc.GetRectangle().GetWidth()); NUnit.Framework.Assert.AreEqual(11, (int)loc.GetRectangle().GetHeight()); // close pdfDocument.Close(); }
public virtual void TestRotatedText() { PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "rotatedText.pdf")); // build strategy RegexBasedLocationExtractionStrategy extractionStrategy = new RegexBasedLocationExtractionStrategy("abc"); // get locations IList <IPdfTextLocation> locationList = new List <IPdfTextLocation>(); for (int x = 1; x <= pdfDocument.GetNumberOfPages(); x++) { new PdfCanvasProcessor(extractionStrategy).ProcessPageContent(pdfDocument.GetPage(x)); foreach (IPdfTextLocation location in extractionStrategy.GetResultantLocations()) { if (location != null) { locationList.Add(location); } } } // compare NUnit.Framework.Assert.AreEqual(2, locationList.Count); NUnit.Framework.Assert.IsTrue(locationList[0].GetRectangle().EqualsWithEpsilon(new Rectangle(188.512f, 450f , 14.800003f, 25.791992f))); NUnit.Framework.Assert.IsTrue(locationList[1].GetRectangle().EqualsWithEpsilon(new Rectangle(36f, 746.688f , 25.792f, 14.799988f))); pdfDocument.Close(); }
public virtual void RegexWithOnlyWhiteSpace() { PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "regexWithWhiteSpacesTest.pdf")); RegexBasedLocationExtractionStrategy extractionStrategy = new RegexBasedLocationExtractionStrategy(" "); new PdfCanvasProcessor(extractionStrategy).ProcessPageContent(pdfDocument.GetPage(1)); IList <IPdfTextLocation> locations = new List <IPdfTextLocation>(extractionStrategy.GetResultantLocations()); pdfDocument.Close(); NUnit.Framework.Assert.AreEqual(0, locations.Count); }
public virtual void RegexWithNewLinesTest() { PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "regexWithNewLinesTest.pdf")); RegexBasedLocationExtractionStrategy extractionStrategy = new RegexBasedLocationExtractionStrategy("\\nstart\\n" ); new PdfCanvasProcessor(extractionStrategy).ProcessPageContent(pdfDocument.GetPage(1)); IList <IPdfTextLocation> locations = new List <IPdfTextLocation>(extractionStrategy.GetResultantLocations()); pdfDocument.Close(); NUnit.Framework.Assert.AreEqual(1, locations.Count); NUnit.Framework.Assert.AreEqual("\nstart\n", locations[0].GetText()); NUnit.Framework.Assert.IsTrue(new Rectangle(56.8f, 729.5970f, 20.6159f, 13.2839f).EqualsWithEpsilon(locations [0].GetRectangle())); }