public static string ToHtml(this TextGeometryModel model, string imageFileName = null)
        {
            var result = new StringBuilder(100000);

            result.Append(HTMLDebugHeader).Append("\n");
            if (imageFileName != null)
            {
                result.Append(string.Format(HTMLDebugImageBoxTemplate, model.PageBox.Width, model.PageBox.Height, imageFileName));
            }
            foreach (var textBlock in model.TextBlocks())
            {
                AppendTextBlock(textBlock, result);
            }
            foreach (var gmTable in model.Tables())
            {
                var bbox = gmTable.BoundingBox;
                result.Append(string.Format(HTMLDebugTableBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height));

                foreach (var textBlock in gmTable.Cells().Select(c => c.TextBlock))
                {
                    AppendTextBlock(textBlock, result);
                }
            }
            foreach (var word in model.Words())
            {
                var bbox = word.BoundingBox;
                result.Append(string.Format(HTMLDebugWordBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height, word.Text, GetBackgroundStyle(word.Accuracy)));
            }
            result.Append(HTMLDebugFooter).Append("\n");
            return(result.ToString().Replace("&", "&"));
        }
Пример #2
0
        public static TextGeometryModel ScaleModel(this TextGeometryModel model, double scaleFactor)
        {
            var targetModel = new TextGeometryModel(ScaleBoundingBox(model.PageBox, scaleFactor), GridUnit.ByResolution(Round(model.GridUnit.Divisor * scaleFactor)));

            ModelGeometryTransformer.TransformModelGeometry(model, targetModel,
                                                            box => ScaleBoundingBox(box, scaleFactor));
            return(targetModel);
        }
Пример #3
0
        /// <summary>
        /// Rotates given model by given isomorphic transform.
        /// Bounding box for an object of the model is transformed in such a way that it is still the minimal rectangle with sides parallel to coordinate axes contaning original box rotated.
        /// </summary>
        /// <param name="model">The model to process</param>
        /// <param name="transform">The transform to apply</param>
        /// <param name="targetWidth">Target model width</param>
        /// <param name="targetHeight">Target model height</param>
        /// <returns></returns>
        public static TextGeometryModel RotateModel(this TextGeometryModel model, IsometricTransform transform, int targetWidth, int targetHeight)
        {
            var targetModel = new TextGeometryModel(new BoundingBox(0, 0, targetWidth, targetHeight), model.GridUnit);

            ModelGeometryTransformer.TransformModelGeometry(model, targetModel,
                                                            box => RotateBoundingBox(box, transform));
            return(targetModel);
        }
Пример #4
0
 public static TextGeometryModel TransformModelGeometry(TextGeometryModel sourceModel, TextGeometryModel targetModel, Func <BoundingBox, BoundingBox> boxConverter)
 {
     foreach (var textBlock in sourceModel.TextBlocks().Select(textBlock => TransformTextBlock(textBlock, boxConverter)))
     {
         targetModel.AddTextBlock(textBlock);
     }
     foreach (var table in sourceModel.Tables().Select(table => TransformTable(table, boxConverter)))
     {
         targetModel.AddTable(table);
     }
     return(targetModel);
 }
Пример #5
0
        /// <summary>
        /// Transforms given model by rotating it by specified set of parameters. If the set of parameters is not given (null) then it will be calculated
        /// in such a way that a resulting model contains text lines aligned strictly horizontally.
        /// </summary>
        /// <param name="model">The model to transform</param>
        /// <param name="deskewParameters">Set of transformation parameters (or null, if transformation should be determined automatically)</param>
        /// <returns>The transformed model</returns>
        public static TextGeometryModel DeskewModel(this TextGeometryModel model, DeskewParameters deskewParameters = null)
        {
            var desckewParams = deskewParameters ?? DetectDeskewParameters(model);
            var transform     = desckewParams.Transform;
            // Console.Out.WriteLine(desckewParams);

            var targetModel = new TextGeometryModel(new BoundingBox(0, 0, desckewParams.TargetWidth, desckewParams.TargetHeight), model.GridUnit);

            ModelGeometryTransformer.TransformModelGeometry(model, targetModel,
                                                            box => DeskewBoundingBox(box, transform));

            return(targetModel);
        }
Пример #6
0
        public static DeskewParameters DetectDeskewParameters(this TextGeometryModel textGeometryModel)
        {
            var anglesDetected = new List <double>();

            foreach (var line in textGeometryModel.Lines())
            {
                ProcessLine(line, anglesDetected);
            }
            var pageBBox    = textGeometryModel.PageBox;
            var deskewAngle = CalculateDeskewAngle(anglesDetected);

            return(new DeskewParameters(pageBBox.XMax, pageBBox.YMax, deskewAngle));
        }
        public static TextGeometryModel RemoveEmptyElements(this TextGeometryModel model)
        {
            var result = new TextGeometryModel(model.PageBox, model.GridUnit);

            foreach (var textBlock in model.TextBlocks())
            {
                var textBlockNew = textBlock.RemoveEmptyElements();
                if (textBlockNew.Paragraphs().Any() || textBlockNew.StandaloneWords().Any())
                {
                    result.AddTextBlock(textBlockNew);
                }
            }
            foreach (var table in model.Tables().Select(RemoveEmptyElementsInTable))
            {
                result.AddTable(table);
            }
            return(result);
        }
        public static string ToHtmlParagraphs(this TextGeometryModel model, string imageFileName = null)
        {
            var result = new StringBuilder(100000);

            result.Append(HTMLDebugHeader).Append("\n");
            if (imageFileName != null)
            {
                result.Append(string.Format(HTMLDebugImageBoxTemplate, model.PageBox.Width, model.PageBox.Height, imageFileName));
            }
            foreach (var textBlock in model.AllTextBlocks())
            {
                var bbox = textBlock.BoundingBox;
                result.Append(string.Format(HTMLDebugTextBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height));

                foreach (var paragraph in textBlock.Paragraphs())
                {
                    bbox = paragraph.BoundingBox;
                    var text = paragraph.AsText(true);
                    result.Append(string.Format(HTMLDebugParagraphBoxWithTextTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height, text, ""));
                }
            }
            result.Append(HTMLDebugFooter).Append("\n");
            return(result.ToString());
        }
 /// <summary>
 /// Returns all lines of text in given model
 /// </summary>
 /// <param name="model"></param>
 /// <returns></returns>
 public static IEnumerable <GMLine> Lines(this TextGeometryModel model)
 {
     return(model.AllTextBlocks().SelectMany(textBlock => textBlock.Paragraphs()).SelectMany(para => para.Lines()));
 }