public static void Run()
        {
            var exporter = new PageXmlTextExporter(NearestNeighbourWordExtractor.Instance,
                                                   RecursiveXYCut.Instance,
                                                   UnsupervisedReadingOrderDetector.Instance,
                                                   scale: zoom);

            using (var converter = new PdfImageConverter(pdfPath))
                using (PdfDocument document = PdfDocument.Open(pdfPath))
                {
                    var page = document.GetPage(pageNo);

                    var xml = exporter.Get(page);
                    File.WriteAllText(Path.ChangeExtension(pdfPath, pageNo + ".xml"), xml);

                    using (var bitmap = converter.GetPage(page.Number, zoom))
                        using (var graphics = Graphics.FromImage(bitmap))
                        {
                            // save pdf page as image
                            bitmap.Save(Path.ChangeExtension(pdfPath, pageNo + "_raw.png"));

                            // save empty image for LayoutEvalGUI
                            Bitmap blackAndWhite = new Bitmap(bitmap.Width, bitmap.Height, PixelFormat.Format8bppIndexed);
                            blackAndWhite.Save(Path.ChangeExtension(pdfPath, pageNo + "_bw_raw.png"));
                        }
                }
        }
示例#2
0
        public static void Run(string path)
        {
            float zoom   = 10;
            var   redPen = new Pen(Color.Red, zoom * 0.4f);

            using (var converter = new PdfImageConverter(path))
                using (var document = PdfDocument.Open(path))
                {
                    for (var i = 0; i < document.NumberOfPages; i++)
                    {
                        var page = document.GetPage(i + 1);

                        using (var bitmap = converter.GetPage(i + 1, zoom))
                            using (var graphics = Graphics.FromImage(bitmap))
                            {
                                var imageHeight = bitmap.Height;

                                foreach (var letter in page.Letters)
                                {
                                    Console.WriteLine(letter.TextDirection);
                                    var height = letter.GlyphRectangle.Height;

                                    var rect = new Rectangle(
                                        (int)(letter.GlyphRectangle.Left * (decimal)zoom),
                                        imageHeight - (int)(letter.GlyphRectangle.Top * (decimal)zoom),
                                        (int)((letter.GlyphRectangle.Width == 0 ? 1 : letter.GlyphRectangle.Width) * (decimal)zoom),
                                        (int)((letter.GlyphRectangle.Height == 0 ? 1 : letter.GlyphRectangle.Height) * (decimal)zoom));
                                    graphics.DrawRectangle(redPen, rect);
                                }

                                bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_imageTest.png"));
                            }
                    }
                }
        }
示例#3
0
        public string GetCode(string documentPath)
        {
            _documentPath = documentPath;

            var    pageNumber = 0;
            string hocr       = "";

            using (var document = PdfDocument.Open(documentPath))
            {
                pageNumber = document.NumberOfPages;
                hocr       = GetCode(document);
            }

            using (var converter = new PdfImageConverter(documentPath))
            {
                for (var i = 0; i < pageNumber; i++)
                {
                    using (var bitmap = converter.GetPage(i + 1, (float)_scale))
                        using (var graphics = Graphics.FromImage(bitmap))
                        {
                            bitmap.Save(GetPageImagePath(documentPath, i + 1));
                        }
                }
            }
            return(hocr);
        }
        public static void Run(string path)
        {
            float zoom     = 3;
            var   pinkPen  = new Pen(Color.HotPink, zoom * 0.4f);
            var   greenPen = new Pen(Color.GreenYellow, zoom * 0.6f);
            var   bluePen  = new Pen(Color.Fuchsia, zoom * 2.0f);

            using (var converter = new PdfImageConverter(path))
                using (PdfDocument document = PdfDocument.Open(path))
                {
                    for (var i = 0; i < document.NumberOfPages; i++)
                    {
                        var page = document.GetPage(i + 1);

                        using (var bitmap = converter.GetPage(i + 1, zoom))
                            using (var graphics = Graphics.FromImage(bitmap))
                            {
                                var imageHeight = bitmap.Height;

                                var words      = page.GetWords(NearestNeighbourWordExtractor.Instance);
                                var pageWordsH = words.Where(x => x.TextDirection == TextDirection.Horizontal || x.TextDirection == TextDirection.Rotate180).ToArray();
                                var blocks     = new RecursiveXYCutLocal(path, i + 1).GetBlocks(pageWordsH);

                                foreach (var block in blocks)
                                {
                                    var rect = new Rectangle(
                                        (int)(block.BoundingBox.Left * (decimal)zoom),
                                        imageHeight - (int)(block.BoundingBox.Top * (decimal)zoom),
                                        (int)(block.BoundingBox.Width * (decimal)zoom),
                                        (int)(block.BoundingBox.Height * (decimal)zoom));

                                    graphics.DrawRectangle(bluePen, rect);
                                }

                                bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_final.png"));
                            }
                    }
                }
        }
        public static void Run(string path)
        {
            // check shape, see http://www.aforgenet.com/articles/shape_checker/
            SimpleShapeChecker shapeChecker = new SimpleShapeChecker()
            {
            };

            float zoom     = 20;
            var   pinkPen  = new Pen(Color.HotPink, zoom * 0.4f);
            var   greenPen = new Pen(Color.GreenYellow, zoom * 0.7f);
            var   aquaPen  = new Pen(Color.Aqua, zoom * 0.7f);
            var   redPen   = new Pen(Color.Red, zoom * 0.4f);
            var   bluePen  = new Pen(Color.Blue, zoom * 0.4f);
            var   blackPen = new Pen(Color.Black, zoom * 0.7f);

            using (var converter = new PdfImageConverter(path))
                using (PdfDocument document = PdfDocument.Open(path))
                {
                    for (var i = 0; i < document.NumberOfPages; i++)
                    {
                        var page  = document.GetPage(i + 1);
                        var paths = page.ExperimentalAccess.Paths;

                        using (var bitmap = converter.GetPage(i + 1, zoom))
                            using (var graphics = Graphics.FromImage(bitmap))
                            {
                                var imageHeight = bitmap.Height;

                                foreach (var letter in page.Letters)
                                {
                                    var rect = new Rectangle(
                                        (int)(letter.GlyphRectangle.Left * (decimal)zoom),
                                        imageHeight - (int)(letter.GlyphRectangle.Top * (decimal)zoom),
                                        (int)(letter.GlyphRectangle.Width * (decimal)zoom),
                                        (int)(letter.GlyphRectangle.Height * (decimal)zoom));
                                    graphics.DrawRectangle(pinkPen, rect);
                                }

                                foreach (var p in paths)
                                {
                                    if (p == null)
                                    {
                                        continue;
                                    }
                                    var commands = p.Commands;
                                    var points   = ToPoints(commands);

                                    //Scatterplot plot = new Scatterplot();
                                    //plot.Compute(points.Select(po => (double)po.X).ToArray(), points.Select(po => (double)po.Y).ToArray());
                                    //ScatterplotBox.Show(plot);

                                    var shape   = shapeChecker.CheckShapeType(points);
                                    var subType = shapeChecker.CheckPolygonSubType(points);

                                    var bboxF = GetBoundingRectangle(commands);
                                    if (bboxF.HasValue)
                                    {
                                        var rect = new Rectangle(
                                            (int)(bboxF.Value.Left * (decimal)zoom),
                                            imageHeight - (int)(bboxF.Value.Top * (decimal)zoom),
                                            (int)(bboxF.Value.Width == 0 ? 1 : bboxF.Value.Width * (decimal)zoom),
                                            (int)(bboxF.Value.Height == 0 ? 1 : bboxF.Value.Height * (decimal)zoom));

                                        var pen = shape == ShapeType.Quadrilateral ? greenPen : (shape == ShapeType.Circle ? aquaPen : blackPen);

                                        graphics.DrawRectangle(pen, rect);
                                    }

                                    foreach (var command in commands)
                                    {
                                        if (command is PdfPath.Line line)
                                        {
                                            var bbox = line.GetBoundingRectangle();
                                            if (bbox.HasValue)
                                            {
                                                var rect = new Rectangle(
                                                    (int)(bbox.Value.Left * (decimal)zoom),
                                                    imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                                    (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                                    (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                                graphics.DrawRectangle(bluePen, rect);
                                            }
                                        }
                                        else if (command is BezierCurve curve)
                                        {
                                            var bbox = curve.GetBoundingRectangle();
                                            if (bbox.HasValue)
                                            {
                                                var rect = new Rectangle(
                                                    (int)(bbox.Value.Left * (decimal)zoom),
                                                    imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                                    (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                                    (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                                graphics.DrawRectangle(redPen, rect);
                                            }
                                        }
                                        else if (command is Close close)
                                        {
                                            var bbox = close.GetBoundingRectangle();
                                            if (bbox.HasValue)
                                            {
                                                var rect = new Rectangle(
                                                    (int)(bbox.Value.Left * (decimal)zoom),
                                                    imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                                    (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                                    (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                                graphics.DrawRectangle(greenPen, rect);
                                            }
                                        }
                                        else if (command is Move move)
                                        {
                                            var bbox = move.GetBoundingRectangle();
                                            if (bbox.HasValue)
                                            {
                                                var rect = new Rectangle(
                                                    (int)(bbox.Value.Left * (decimal)zoom),
                                                    imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                                    (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                                    (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                                graphics.DrawRectangle(greenPen, rect);
                                            }
                                        }
                                        else
                                        {
                                            throw new NotImplementedException(command.GetType().ToString());
                                        }
                                    }
                                }

                                var rectsPaths = RecursiveXYCutPath.Instance.GetBlocks(paths, 0, 10, 10);
                                foreach (var rectPath in rectsPaths)
                                {
                                    var rect = new Rectangle(
                                        (int)(rectPath.Left * (decimal)zoom),
                                        imageHeight - (int)(rectPath.Top * (decimal)zoom),
                                        (int)(rectPath.Width * (decimal)zoom),
                                        (int)(rectPath.Height * (decimal)zoom));
                                    graphics.DrawRectangle(aquaPen, rect);
                                }

                                bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_pathsTest.png"));
                            }
                    }
                }
        }
示例#6
0
        public static void Run(string path)
        {
            // check shape, see http://www.aforgenet.com/articles/shape_checker/
            SimpleShapeChecker shapeChecker = new SimpleShapeChecker()
            {
            };

            float zoom     = 20;
            var   pinkPen  = new Pen(Color.HotPink, zoom * 0.4f);
            var   greenPen = new Pen(Color.GreenYellow, zoom * 0.7f);
            var   aquaPen  = new Pen(Color.Aqua, zoom * 0.7f);
            var   redPen   = new Pen(Color.Red, zoom * 0.4f);
            var   bluePen  = new Pen(Color.Blue, zoom * 0.4f);
            var   blackPen = new Pen(Color.Black, zoom * 0.7f);

            using (var converter = new PdfImageConverter(path))
                using (PdfDocument document = PdfDocument.Open(path))
                {
                    for (var i = 0; i < document.NumberOfPages; i++)
                    {
                        var page       = document.GetPage(i + 1);
                        var paths      = page.ExperimentalAccess.Paths;
                        var geometries = paths.Select(p => new PdfGeometry(p)).ToList();

                        var verticals = geometries.Where(g => g.IsVerticalLine()).ToList();

                        var horizontals = geometries.Where(g => g.IsHorizontalLine()).ToList();


                        using (var bitmap = converter.GetPage(i + 1, zoom))
                            using (var graphics = Graphics.FromImage(bitmap))
                            {
                                var imageHeight = bitmap.Height;

                                foreach (var letter in page.Letters)
                                {
                                    var rect = new Rectangle(
                                        (int)(letter.GlyphRectangle.Left * (decimal)zoom),
                                        imageHeight - (int)(letter.GlyphRectangle.Top * (decimal)zoom),
                                        (int)(letter.GlyphRectangle.Width * (decimal)zoom),
                                        (int)(letter.GlyphRectangle.Height * (decimal)zoom));
                                    graphics.DrawRectangle(pinkPen, rect);
                                }

                                foreach (var p in paths)
                                {
                                    if (p == null)
                                    {
                                        continue;
                                    }
                                    PdfGeometry geometry = new PdfGeometry(p);

                                    var isClosed    = geometry.IsClosed;
                                    var isClockwise = geometry.IsClockwise;

                                    var commands = p.Commands;
                                    var points   = ToOrderedPoints(commands);

                                    if (isClosed) //.SubGeometries.Count > 1)
                                    {
                                        //Scatterplot scatterplot = new Scatterplot();
                                        //scatterplot.Compute(
                                        //    points.Select(po => (double)po.X).Take(31).ToArray(),
                                        //    points.Select(po => (double)po.Y).Take(31).ToArray(),
                                        //    Enumerable.Range(0, points.Count).Take(31).ToArray());
                                        //ScatterplotBox.Show(scatterplot);


                                        ScatterplotView view = new ScatterplotView();
                                        view.Dock         = System.Windows.Forms.DockStyle.Fill;
                                        view.LinesVisible = true;
                                        view.Graph.GraphPane.Title.Text = isClockwise ? "CW" : "CCW";

                                        foreach (var command in commands)
                                        {
                                            if (command is PdfPath.Line line)
                                            {
                                                view.Graph.GraphPane.GraphObjList.Add(new ZedGraph.ArrowObj(
                                                                                          Color.Blue, 10.0f, (double)line.From.X, (double)line.From.Y,
                                                                                          (double)line.To.X, (double)line.To.Y));

                                                view.Graph.GraphPane.AddCurve("",
                                                                              new[] { (double)line.From.X, (double)line.To.X },
                                                                              new[] { (double)line.From.Y, (double)line.To.Y },
                                                                              Color.Red);
                                            }
                                            else if (command is BezierCurve curve)
                                            {
                                                foreach (var lineB in BezierCurveToPaths(curve))
                                                {
                                                    view.Graph.GraphPane.GraphObjList.Add(new ZedGraph.ArrowObj(
                                                                                              Color.Blue, 10.0f, (double)lineB.From.X, (double)lineB.From.Y,
                                                                                              (double)lineB.To.X, (double)lineB.To.Y));

                                                    view.Graph.GraphPane.AddCurve("",
                                                                                  new[] { (double)lineB.From.X, (double)lineB.To.X },
                                                                                  new[] { (double)lineB.From.Y, (double)lineB.To.Y },
                                                                                  Color.Red);
                                                }
                                            }
                                        }


                                        //view.Graph.GraphPane.AddCurve("curve",
                                        //    points.Select(po => (double)po.X).ToArray(),
                                        //    points.Select(po => (double)po.Y).ToArray(),
                                        //    Color.Blue,
                                        //    ZedGraph.SymbolType.Circle);
                                        view.Graph.GraphPane.AxisChange();
                                        var f1 = new System.Windows.Forms.Form();
                                        f1.Width  = 1000;
                                        f1.Height = 1000;
                                        f1.Controls.Add(view);
                                        f1.ShowDialog();
                                    }

                                    var shape   = shapeChecker.CheckShapeType(points);
                                    var subType = shapeChecker.CheckPolygonSubType(points);

                                    var bboxF = GetBoundingRectangle(commands);
                                    if (bboxF.HasValue)
                                    {
                                        var rect = new Rectangle(
                                            (int)(bboxF.Value.Left * (decimal)zoom),
                                            imageHeight - (int)(bboxF.Value.Top * (decimal)zoom),
                                            (int)(bboxF.Value.Width == 0 ? 1 : bboxF.Value.Width * (decimal)zoom),
                                            (int)(bboxF.Value.Height == 0 ? 1 : bboxF.Value.Height * (decimal)zoom));

                                        graphics.DrawRectangle(greenPen, rect);
                                    }

                                    /*foreach (var command in commands)
                                     * {
                                     *  if (command is PdfPath.Line line)
                                     *  {
                                     *      var bbox = line.GetBoundingRectangle();
                                     *      if (bbox.HasValue)
                                     *      {
                                     *          var rect = new Rectangle(
                                     *              (int)(bbox.Value.Left * (decimal)zoom),
                                     *              imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                     *              (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                     *              (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                     *          graphics.DrawRectangle(bluePen, rect);
                                     *      }
                                     *  }
                                     *  else if (command is BezierCurve curve)
                                     *  {
                                     *      var bbox = curve.GetBoundingRectangle();
                                     *      if (bbox.HasValue)
                                     *      {
                                     *          var rect = new Rectangle(
                                     *              (int)(bbox.Value.Left * (decimal)zoom),
                                     *              imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                     *              (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                     *              (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                     *          graphics.DrawRectangle(redPen, rect);
                                     *      }
                                     *  }
                                     *  else if (command is Close close)
                                     *  {
                                     *      var bbox = close.GetBoundingRectangle();
                                     *      if (bbox.HasValue)
                                     *      {
                                     *          var rect = new Rectangle(
                                     *              (int)(bbox.Value.Left * (decimal)zoom),
                                     *              imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                     *              (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                     *              (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                     *          graphics.DrawRectangle(greenPen, rect);
                                     *      }
                                     *  }
                                     *  else if (command is Move move)
                                     *  {
                                     *      var bbox = move.GetBoundingRectangle();
                                     *      if (bbox.HasValue)
                                     *      {
                                     *          var rect = new Rectangle(
                                     *              (int)(bbox.Value.Left * (decimal)zoom),
                                     *              imageHeight - (int)(bbox.Value.Top * (decimal)zoom),
                                     *              (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom),
                                     *              (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom));
                                     *          graphics.DrawRectangle(greenPen, rect);
                                     *      }
                                     *  }
                                     *  else
                                     *  {
                                     *      throw new NotImplementedException(command.GetType().ToString());
                                     *  }
                                     * }*/
                                }

                                var rectsPaths = RecursiveXYCutPath.Instance.GetBlocks(paths, 0, 10, 10);
                                foreach (var rectPath in rectsPaths)
                                {
                                    var rect = new Rectangle(
                                        (int)(rectPath.Left * (decimal)zoom),
                                        imageHeight - (int)(rectPath.Top * (decimal)zoom),
                                        (int)(rectPath.Width * (decimal)zoom),
                                        (int)(rectPath.Height * (decimal)zoom));
                                    graphics.DrawRectangle(aquaPen, rect);
                                }

                                bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_pathsTest.png"));
                            }
                    }
                }
        }