public static void Run() { var exporter = new PageXmlTextExporter(NearestNeighbourWordExtractor.Instance, RecursiveXYCut.Instance, UnsupervisedReadingOrderDetector.Instance, scale: zoom); using (var converter = new PdfImageConverter(pdfPath)) using (PdfDocument document = PdfDocument.Open(pdfPath)) { var page = document.GetPage(pageNo); var xml = exporter.Get(page); File.WriteAllText(Path.ChangeExtension(pdfPath, pageNo + ".xml"), xml); using (var bitmap = converter.GetPage(page.Number, zoom)) using (var graphics = Graphics.FromImage(bitmap)) { // save pdf page as image bitmap.Save(Path.ChangeExtension(pdfPath, pageNo + "_raw.png")); // save empty image for LayoutEvalGUI Bitmap blackAndWhite = new Bitmap(bitmap.Width, bitmap.Height, PixelFormat.Format8bppIndexed); blackAndWhite.Save(Path.ChangeExtension(pdfPath, pageNo + "_bw_raw.png")); } } }
public static void Run(string path) { float zoom = 10; var redPen = new Pen(Color.Red, zoom * 0.4f); using (var converter = new PdfImageConverter(path)) using (var document = PdfDocument.Open(path)) { for (var i = 0; i < document.NumberOfPages; i++) { var page = document.GetPage(i + 1); using (var bitmap = converter.GetPage(i + 1, zoom)) using (var graphics = Graphics.FromImage(bitmap)) { var imageHeight = bitmap.Height; foreach (var letter in page.Letters) { Console.WriteLine(letter.TextDirection); var height = letter.GlyphRectangle.Height; var rect = new Rectangle( (int)(letter.GlyphRectangle.Left * (decimal)zoom), imageHeight - (int)(letter.GlyphRectangle.Top * (decimal)zoom), (int)((letter.GlyphRectangle.Width == 0 ? 1 : letter.GlyphRectangle.Width) * (decimal)zoom), (int)((letter.GlyphRectangle.Height == 0 ? 1 : letter.GlyphRectangle.Height) * (decimal)zoom)); graphics.DrawRectangle(redPen, rect); } bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_imageTest.png")); } } } }
public string GetCode(string documentPath) { _documentPath = documentPath; var pageNumber = 0; string hocr = ""; using (var document = PdfDocument.Open(documentPath)) { pageNumber = document.NumberOfPages; hocr = GetCode(document); } using (var converter = new PdfImageConverter(documentPath)) { for (var i = 0; i < pageNumber; i++) { using (var bitmap = converter.GetPage(i + 1, (float)_scale)) using (var graphics = Graphics.FromImage(bitmap)) { bitmap.Save(GetPageImagePath(documentPath, i + 1)); } } } return(hocr); }
public static void Run(string path) { float zoom = 3; var pinkPen = new Pen(Color.HotPink, zoom * 0.4f); var greenPen = new Pen(Color.GreenYellow, zoom * 0.6f); var bluePen = new Pen(Color.Fuchsia, zoom * 2.0f); using (var converter = new PdfImageConverter(path)) using (PdfDocument document = PdfDocument.Open(path)) { for (var i = 0; i < document.NumberOfPages; i++) { var page = document.GetPage(i + 1); using (var bitmap = converter.GetPage(i + 1, zoom)) using (var graphics = Graphics.FromImage(bitmap)) { var imageHeight = bitmap.Height; var words = page.GetWords(NearestNeighbourWordExtractor.Instance); var pageWordsH = words.Where(x => x.TextDirection == TextDirection.Horizontal || x.TextDirection == TextDirection.Rotate180).ToArray(); var blocks = new RecursiveXYCutLocal(path, i + 1).GetBlocks(pageWordsH); foreach (var block in blocks) { var rect = new Rectangle( (int)(block.BoundingBox.Left * (decimal)zoom), imageHeight - (int)(block.BoundingBox.Top * (decimal)zoom), (int)(block.BoundingBox.Width * (decimal)zoom), (int)(block.BoundingBox.Height * (decimal)zoom)); graphics.DrawRectangle(bluePen, rect); } bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_final.png")); } } } }
public static void Run(string path) { // check shape, see http://www.aforgenet.com/articles/shape_checker/ SimpleShapeChecker shapeChecker = new SimpleShapeChecker() { }; float zoom = 20; var pinkPen = new Pen(Color.HotPink, zoom * 0.4f); var greenPen = new Pen(Color.GreenYellow, zoom * 0.7f); var aquaPen = new Pen(Color.Aqua, zoom * 0.7f); var redPen = new Pen(Color.Red, zoom * 0.4f); var bluePen = new Pen(Color.Blue, zoom * 0.4f); var blackPen = new Pen(Color.Black, zoom * 0.7f); using (var converter = new PdfImageConverter(path)) using (PdfDocument document = PdfDocument.Open(path)) { for (var i = 0; i < document.NumberOfPages; i++) { var page = document.GetPage(i + 1); var paths = page.ExperimentalAccess.Paths; using (var bitmap = converter.GetPage(i + 1, zoom)) using (var graphics = Graphics.FromImage(bitmap)) { var imageHeight = bitmap.Height; foreach (var letter in page.Letters) { var rect = new Rectangle( (int)(letter.GlyphRectangle.Left * (decimal)zoom), imageHeight - (int)(letter.GlyphRectangle.Top * (decimal)zoom), (int)(letter.GlyphRectangle.Width * (decimal)zoom), (int)(letter.GlyphRectangle.Height * (decimal)zoom)); graphics.DrawRectangle(pinkPen, rect); } foreach (var p in paths) { if (p == null) { continue; } var commands = p.Commands; var points = ToPoints(commands); //Scatterplot plot = new Scatterplot(); //plot.Compute(points.Select(po => (double)po.X).ToArray(), points.Select(po => (double)po.Y).ToArray()); //ScatterplotBox.Show(plot); var shape = shapeChecker.CheckShapeType(points); var subType = shapeChecker.CheckPolygonSubType(points); var bboxF = GetBoundingRectangle(commands); if (bboxF.HasValue) { var rect = new Rectangle( (int)(bboxF.Value.Left * (decimal)zoom), imageHeight - (int)(bboxF.Value.Top * (decimal)zoom), (int)(bboxF.Value.Width == 0 ? 1 : bboxF.Value.Width * (decimal)zoom), (int)(bboxF.Value.Height == 0 ? 1 : bboxF.Value.Height * (decimal)zoom)); var pen = shape == ShapeType.Quadrilateral ? greenPen : (shape == ShapeType.Circle ? aquaPen : blackPen); graphics.DrawRectangle(pen, rect); } foreach (var command in commands) { if (command is PdfPath.Line line) { var bbox = line.GetBoundingRectangle(); if (bbox.HasValue) { var rect = new Rectangle( (int)(bbox.Value.Left * (decimal)zoom), imageHeight - (int)(bbox.Value.Top * (decimal)zoom), (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); graphics.DrawRectangle(bluePen, rect); } } else if (command is BezierCurve curve) { var bbox = curve.GetBoundingRectangle(); if (bbox.HasValue) { var rect = new Rectangle( (int)(bbox.Value.Left * (decimal)zoom), imageHeight - (int)(bbox.Value.Top * (decimal)zoom), (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); graphics.DrawRectangle(redPen, rect); } } else if (command is Close close) { var bbox = close.GetBoundingRectangle(); if (bbox.HasValue) { var rect = new Rectangle( (int)(bbox.Value.Left * (decimal)zoom), imageHeight - (int)(bbox.Value.Top * (decimal)zoom), (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); graphics.DrawRectangle(greenPen, rect); } } else if (command is Move move) { var bbox = move.GetBoundingRectangle(); if (bbox.HasValue) { var rect = new Rectangle( (int)(bbox.Value.Left * (decimal)zoom), imageHeight - (int)(bbox.Value.Top * (decimal)zoom), (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); graphics.DrawRectangle(greenPen, rect); } } else { throw new NotImplementedException(command.GetType().ToString()); } } } var rectsPaths = RecursiveXYCutPath.Instance.GetBlocks(paths, 0, 10, 10); foreach (var rectPath in rectsPaths) { var rect = new Rectangle( (int)(rectPath.Left * (decimal)zoom), imageHeight - (int)(rectPath.Top * (decimal)zoom), (int)(rectPath.Width * (decimal)zoom), (int)(rectPath.Height * (decimal)zoom)); graphics.DrawRectangle(aquaPen, rect); } bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_pathsTest.png")); } } } }
public static void Run(string path) { // check shape, see http://www.aforgenet.com/articles/shape_checker/ SimpleShapeChecker shapeChecker = new SimpleShapeChecker() { }; float zoom = 20; var pinkPen = new Pen(Color.HotPink, zoom * 0.4f); var greenPen = new Pen(Color.GreenYellow, zoom * 0.7f); var aquaPen = new Pen(Color.Aqua, zoom * 0.7f); var redPen = new Pen(Color.Red, zoom * 0.4f); var bluePen = new Pen(Color.Blue, zoom * 0.4f); var blackPen = new Pen(Color.Black, zoom * 0.7f); using (var converter = new PdfImageConverter(path)) using (PdfDocument document = PdfDocument.Open(path)) { for (var i = 0; i < document.NumberOfPages; i++) { var page = document.GetPage(i + 1); var paths = page.ExperimentalAccess.Paths; var geometries = paths.Select(p => new PdfGeometry(p)).ToList(); var verticals = geometries.Where(g => g.IsVerticalLine()).ToList(); var horizontals = geometries.Where(g => g.IsHorizontalLine()).ToList(); using (var bitmap = converter.GetPage(i + 1, zoom)) using (var graphics = Graphics.FromImage(bitmap)) { var imageHeight = bitmap.Height; foreach (var letter in page.Letters) { var rect = new Rectangle( (int)(letter.GlyphRectangle.Left * (decimal)zoom), imageHeight - (int)(letter.GlyphRectangle.Top * (decimal)zoom), (int)(letter.GlyphRectangle.Width * (decimal)zoom), (int)(letter.GlyphRectangle.Height * (decimal)zoom)); graphics.DrawRectangle(pinkPen, rect); } foreach (var p in paths) { if (p == null) { continue; } PdfGeometry geometry = new PdfGeometry(p); var isClosed = geometry.IsClosed; var isClockwise = geometry.IsClockwise; var commands = p.Commands; var points = ToOrderedPoints(commands); if (isClosed) //.SubGeometries.Count > 1) { //Scatterplot scatterplot = new Scatterplot(); //scatterplot.Compute( // points.Select(po => (double)po.X).Take(31).ToArray(), // points.Select(po => (double)po.Y).Take(31).ToArray(), // Enumerable.Range(0, points.Count).Take(31).ToArray()); //ScatterplotBox.Show(scatterplot); ScatterplotView view = new ScatterplotView(); view.Dock = System.Windows.Forms.DockStyle.Fill; view.LinesVisible = true; view.Graph.GraphPane.Title.Text = isClockwise ? "CW" : "CCW"; foreach (var command in commands) { if (command is PdfPath.Line line) { view.Graph.GraphPane.GraphObjList.Add(new ZedGraph.ArrowObj( Color.Blue, 10.0f, (double)line.From.X, (double)line.From.Y, (double)line.To.X, (double)line.To.Y)); view.Graph.GraphPane.AddCurve("", new[] { (double)line.From.X, (double)line.To.X }, new[] { (double)line.From.Y, (double)line.To.Y }, Color.Red); } else if (command is BezierCurve curve) { foreach (var lineB in BezierCurveToPaths(curve)) { view.Graph.GraphPane.GraphObjList.Add(new ZedGraph.ArrowObj( Color.Blue, 10.0f, (double)lineB.From.X, (double)lineB.From.Y, (double)lineB.To.X, (double)lineB.To.Y)); view.Graph.GraphPane.AddCurve("", new[] { (double)lineB.From.X, (double)lineB.To.X }, new[] { (double)lineB.From.Y, (double)lineB.To.Y }, Color.Red); } } } //view.Graph.GraphPane.AddCurve("curve", // points.Select(po => (double)po.X).ToArray(), // points.Select(po => (double)po.Y).ToArray(), // Color.Blue, // ZedGraph.SymbolType.Circle); view.Graph.GraphPane.AxisChange(); var f1 = new System.Windows.Forms.Form(); f1.Width = 1000; f1.Height = 1000; f1.Controls.Add(view); f1.ShowDialog(); } var shape = shapeChecker.CheckShapeType(points); var subType = shapeChecker.CheckPolygonSubType(points); var bboxF = GetBoundingRectangle(commands); if (bboxF.HasValue) { var rect = new Rectangle( (int)(bboxF.Value.Left * (decimal)zoom), imageHeight - (int)(bboxF.Value.Top * (decimal)zoom), (int)(bboxF.Value.Width == 0 ? 1 : bboxF.Value.Width * (decimal)zoom), (int)(bboxF.Value.Height == 0 ? 1 : bboxF.Value.Height * (decimal)zoom)); graphics.DrawRectangle(greenPen, rect); } /*foreach (var command in commands) * { * if (command is PdfPath.Line line) * { * var bbox = line.GetBoundingRectangle(); * if (bbox.HasValue) * { * var rect = new Rectangle( * (int)(bbox.Value.Left * (decimal)zoom), * imageHeight - (int)(bbox.Value.Top * (decimal)zoom), * (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), * (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); * graphics.DrawRectangle(bluePen, rect); * } * } * else if (command is BezierCurve curve) * { * var bbox = curve.GetBoundingRectangle(); * if (bbox.HasValue) * { * var rect = new Rectangle( * (int)(bbox.Value.Left * (decimal)zoom), * imageHeight - (int)(bbox.Value.Top * (decimal)zoom), * (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), * (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); * graphics.DrawRectangle(redPen, rect); * } * } * else if (command is Close close) * { * var bbox = close.GetBoundingRectangle(); * if (bbox.HasValue) * { * var rect = new Rectangle( * (int)(bbox.Value.Left * (decimal)zoom), * imageHeight - (int)(bbox.Value.Top * (decimal)zoom), * (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), * (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); * graphics.DrawRectangle(greenPen, rect); * } * } * else if (command is Move move) * { * var bbox = move.GetBoundingRectangle(); * if (bbox.HasValue) * { * var rect = new Rectangle( * (int)(bbox.Value.Left * (decimal)zoom), * imageHeight - (int)(bbox.Value.Top * (decimal)zoom), * (int)(bbox.Value.Width == 0 ? 1 : bbox.Value.Width * (decimal)zoom), * (int)(bbox.Value.Height == 0 ? 1 : bbox.Value.Height * (decimal)zoom)); * graphics.DrawRectangle(greenPen, rect); * } * } * else * { * throw new NotImplementedException(command.GetType().ToString()); * } * }*/ } var rectsPaths = RecursiveXYCutPath.Instance.GetBlocks(paths, 0, 10, 10); foreach (var rectPath in rectsPaths) { var rect = new Rectangle( (int)(rectPath.Left * (decimal)zoom), imageHeight - (int)(rectPath.Top * (decimal)zoom), (int)(rectPath.Width * (decimal)zoom), (int)(rectPath.Height * (decimal)zoom)); graphics.DrawRectangle(aquaPen, rect); } bitmap.Save(Path.ChangeExtension(path, (i + 1) + "_pathsTest.png")); } } } }