Exemplo n.º 1
0
        /// <summary>
        /// Get the hOCR string for the path.
        /// <para>http://kba.cloud/hocr-spec/1.2/#elementdef-ocr_linedrawing</para>
        /// </summary>
        /// <param name="path"></param>
        /// <param name="pageHeight"></param>
        /// <param name="subPaths"></param>
        /// <param name="level">The indent level.</param>
        private string GetCode(PdfPath path, double pageHeight, bool subPaths, int level)
        {
            if (path == null)
            {
                return(string.Empty);
            }

            string hocr = string.Empty;

            if (subPaths)
            {
                var bbox = path.GetBoundingRectangle();
                if (bbox.HasValue)
                {
                    areaCount++;
                    hocr += GetIndent(level) + @"<div class='ocr_carea' id='block_" + pageCount + "_"
                            + areaCount + "' title='" + GetCode(bbox.Value, pageHeight) + "'>\n";
                    foreach (var subPath in path.Commands)
                    {
                        var subBbox = subPath.GetBoundingRectangle();
                        if (subBbox.HasValue)
                        {
                            pathCount++;
                            hocr += GetIndent(level + 1) + @"<span class='ocr_linedrawing' id='drawing_" + pageCount + "_"
                                    + pathCount + "' title='" + GetCode(subBbox.Value, pageHeight) + "' />\n";
                        }
                    }
                    hocr += GetIndent(level) + @"</div>";
                }
            }
            else
            {
                var bbox = path.GetBoundingRectangle();
                if (bbox.HasValue)
                {
                    pathCount++;
                    hocr += GetIndent(level) + @"<span class='ocr_linedrawing' id='drawing_" + pageCount + "_"
                            + pathCount + "' title='" + GetCode(bbox.Value, pageHeight) + "' />";
                }
            }

            return(hocr);
        }
Exemplo n.º 2
0
        private PageXmlDocument.PageXmlLineDrawingRegion ToPageXmlLineDrawingRegion(PdfPath pdfPath, double pageWidth, double pageHeight)
        {
            var bbox = pdfPath.GetBoundingRectangle();

            if (bbox.HasValue)
            {
                regionCount++;
                return(new PageXmlDocument.PageXmlLineDrawingRegion()
                {
                    Coords = ToCoords(bbox.Value, pageWidth, pageHeight),
                    Id = "r" + regionCount
                });
            }
            return(null);
        }
Exemplo n.º 3
0
        private AltoDocument.AltoGraphicalElement ToAltoGraphicalElement(PdfPath pdfPath, decimal height)
        {
            graphicalElementCount++;

            var rectangle = pdfPath.GetBoundingRectangle();

            if (rectangle.HasValue)
            {
                return(new AltoDocument.AltoGraphicalElement
                {
                    VerticalPosition = (float)Math.Round((height - rectangle.Value.Top) * scale),
                    HorizontalPosition = (float)Math.Round(rectangle.Value.Left * scale),
                    Height = (float)Math.Round(rectangle.Value.Height * scale),
                    Width = (float)Math.Round(rectangle.Value.Width * scale),
                    Rotation = 0,
                    StyleRefs = null,
                    TagRefs = null,
                    Title = null,
                    Type = null,
                    Id = "P" + pageCount + "_GE" + graphicalElementCount.ToString("#00000")
                });
            }
            return(null);
        }