Exemple #1
0
 public void CanExportAltoXmlFormat()
 {
     using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff))
     {
         var exporter = new AltoXmlTextExporter(new NearestNeighbourWordExtractor(), new DocstrumBoundingBoxes());
         var xml      = exporter.Get(document.GetPage(4), true);
         Assert.NotNull(xml);
         using (var xmlStream = new MemoryStream(Encoding.UTF8.GetBytes(xml)))
             using (var xmlReader = new XmlTextReader(xmlStream))
             {
                 var xDocument = XDocument.Load(xmlReader);
                 Assert.NotNull(xDocument);
             }
     }
 }
Exemple #2
0
 public void CanExportAltoXmlFormatPage16()
 {
     // Page 16 contains an unprintable string and a single line of text which causes problems for Docstrum.
     using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff))
     {
         var exporter = new AltoXmlTextExporter(new NearestNeighbourWordExtractor(), new DocstrumBoundingBoxes());
         var xml      = exporter.Get(document.GetPage(16), true);
         Assert.NotNull(xml);
         using (var xmlStream = new MemoryStream(Encoding.UTF8.GetBytes(xml)))
             using (var xmlReader = new XmlTextReader(xmlStream))
             {
                 var xDocument = XDocument.Load(xmlReader);
                 Assert.NotNull(xDocument);
             }
     }
 }