public void Eu004() { using (PdfDocument document = PdfDocument.Open("Resources/icdar2013-dataset/competition-dataset-eu/eu-004.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(document); PageArea page = oe.Extract(3); var detector = new SimpleNurminenDetectionAlgorithm(); var regions = detector.Detect(page); var newArea = page.GetArea(regions[0].BoundingBox); var sea = new SpreadsheetExtractionAlgorithm(); var tables = sea.Extract(newArea); /* * var detector = new SimpleNurminenDetectionAlgorithm(); * var regions = detector.Detect(page); * * foreach (var a in regions) * { * IExtractionAlgorithm ea = new BasicExtractionAlgorithm(); * var newArea = page.GetArea(a.BoundingBox); * List<Table> tables = ea.Extract(newArea); * } */ } }
public void TestLinesToCells() { using (PdfDocument document = PdfDocument.Open("test3.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(document); PageArea page = oe.Extract(1); SimpleNurminenDetectionAlgorithm detector = new SimpleNurminenDetectionAlgorithm(); var regions = detector.Detect(page); foreach (var a in regions) { IExtractionAlgorithm ea = new BasicExtractionAlgorithm(); var newArea = page.GetArea(a.BoundingBox); List <Table> tables = ea.Extract(newArea); } } }