コード例 #1
0
        public void Eu004()
        {
            using (PdfDocument document = PdfDocument.Open("Resources/icdar2013-dataset/competition-dataset-eu/eu-004.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe   = new ObjectExtractor(document);
                PageArea        page = oe.Extract(3);

                var detector = new SimpleNurminenDetectionAlgorithm();
                var regions  = detector.Detect(page);

                var newArea = page.GetArea(regions[0].BoundingBox);

                var sea    = new SpreadsheetExtractionAlgorithm();
                var tables = sea.Extract(newArea);

                /*
                 * var detector = new SimpleNurminenDetectionAlgorithm();
                 * var regions = detector.Detect(page);
                 *
                 * foreach (var a in regions)
                 * {
                 *  IExtractionAlgorithm ea = new BasicExtractionAlgorithm();
                 *  var newArea = page.GetArea(a.BoundingBox);
                 *  List<Table> tables = ea.Extract(newArea);
                 * }
                 */
            }
        }
コード例 #2
0
        public void TestLinesToCells()
        {
            using (PdfDocument document = PdfDocument.Open("test3.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe   = new ObjectExtractor(document);
                PageArea        page = oe.Extract(1);

                SimpleNurminenDetectionAlgorithm detector = new SimpleNurminenDetectionAlgorithm();
                var regions = detector.Detect(page);

                foreach (var a in regions)
                {
                    IExtractionAlgorithm ea = new BasicExtractionAlgorithm();
                    var          newArea    = page.GetArea(a.BoundingBox);
                    List <Table> tables     = ea.Extract(newArea);
                }
            }
        }