public static string ToHtml(this TextGeometryModel model, string imageFileName = null) { var result = new StringBuilder(100000); result.Append(HTMLDebugHeader).Append("\n"); if (imageFileName != null) { result.Append(string.Format(HTMLDebugImageBoxTemplate, model.PageBox.Width, model.PageBox.Height, imageFileName)); } foreach (var textBlock in model.TextBlocks()) { AppendTextBlock(textBlock, result); } foreach (var gmTable in model.Tables()) { var bbox = gmTable.BoundingBox; result.Append(string.Format(HTMLDebugTableBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height)); foreach (var textBlock in gmTable.Cells().Select(c => c.TextBlock)) { AppendTextBlock(textBlock, result); } } foreach (var word in model.Words()) { var bbox = word.BoundingBox; result.Append(string.Format(HTMLDebugWordBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height, word.Text, GetBackgroundStyle(word.Accuracy))); } result.Append(HTMLDebugFooter).Append("\n"); return(result.ToString().Replace("&", "&")); }
public static TextGeometryModel ScaleModel(this TextGeometryModel model, double scaleFactor) { var targetModel = new TextGeometryModel(ScaleBoundingBox(model.PageBox, scaleFactor), GridUnit.ByResolution(Round(model.GridUnit.Divisor * scaleFactor))); ModelGeometryTransformer.TransformModelGeometry(model, targetModel, box => ScaleBoundingBox(box, scaleFactor)); return(targetModel); }
/// <summary> /// Rotates given model by given isomorphic transform. /// Bounding box for an object of the model is transformed in such a way that it is still the minimal rectangle with sides parallel to coordinate axes contaning original box rotated. /// </summary> /// <param name="model">The model to process</param> /// <param name="transform">The transform to apply</param> /// <param name="targetWidth">Target model width</param> /// <param name="targetHeight">Target model height</param> /// <returns></returns> public static TextGeometryModel RotateModel(this TextGeometryModel model, IsometricTransform transform, int targetWidth, int targetHeight) { var targetModel = new TextGeometryModel(new BoundingBox(0, 0, targetWidth, targetHeight), model.GridUnit); ModelGeometryTransformer.TransformModelGeometry(model, targetModel, box => RotateBoundingBox(box, transform)); return(targetModel); }
public static TextGeometryModel TransformModelGeometry(TextGeometryModel sourceModel, TextGeometryModel targetModel, Func <BoundingBox, BoundingBox> boxConverter) { foreach (var textBlock in sourceModel.TextBlocks().Select(textBlock => TransformTextBlock(textBlock, boxConverter))) { targetModel.AddTextBlock(textBlock); } foreach (var table in sourceModel.Tables().Select(table => TransformTable(table, boxConverter))) { targetModel.AddTable(table); } return(targetModel); }
/// <summary> /// Transforms given model by rotating it by specified set of parameters. If the set of parameters is not given (null) then it will be calculated /// in such a way that a resulting model contains text lines aligned strictly horizontally. /// </summary> /// <param name="model">The model to transform</param> /// <param name="deskewParameters">Set of transformation parameters (or null, if transformation should be determined automatically)</param> /// <returns>The transformed model</returns> public static TextGeometryModel DeskewModel(this TextGeometryModel model, DeskewParameters deskewParameters = null) { var desckewParams = deskewParameters ?? DetectDeskewParameters(model); var transform = desckewParams.Transform; // Console.Out.WriteLine(desckewParams); var targetModel = new TextGeometryModel(new BoundingBox(0, 0, desckewParams.TargetWidth, desckewParams.TargetHeight), model.GridUnit); ModelGeometryTransformer.TransformModelGeometry(model, targetModel, box => DeskewBoundingBox(box, transform)); return(targetModel); }
public static DeskewParameters DetectDeskewParameters(this TextGeometryModel textGeometryModel) { var anglesDetected = new List <double>(); foreach (var line in textGeometryModel.Lines()) { ProcessLine(line, anglesDetected); } var pageBBox = textGeometryModel.PageBox; var deskewAngle = CalculateDeskewAngle(anglesDetected); return(new DeskewParameters(pageBBox.XMax, pageBBox.YMax, deskewAngle)); }
public static TextGeometryModel RemoveEmptyElements(this TextGeometryModel model) { var result = new TextGeometryModel(model.PageBox, model.GridUnit); foreach (var textBlock in model.TextBlocks()) { var textBlockNew = textBlock.RemoveEmptyElements(); if (textBlockNew.Paragraphs().Any() || textBlockNew.StandaloneWords().Any()) { result.AddTextBlock(textBlockNew); } } foreach (var table in model.Tables().Select(RemoveEmptyElementsInTable)) { result.AddTable(table); } return(result); }
public static string ToHtmlParagraphs(this TextGeometryModel model, string imageFileName = null) { var result = new StringBuilder(100000); result.Append(HTMLDebugHeader).Append("\n"); if (imageFileName != null) { result.Append(string.Format(HTMLDebugImageBoxTemplate, model.PageBox.Width, model.PageBox.Height, imageFileName)); } foreach (var textBlock in model.AllTextBlocks()) { var bbox = textBlock.BoundingBox; result.Append(string.Format(HTMLDebugTextBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height)); foreach (var paragraph in textBlock.Paragraphs()) { bbox = paragraph.BoundingBox; var text = paragraph.AsText(true); result.Append(string.Format(HTMLDebugParagraphBoxWithTextTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height, text, "")); } } result.Append(HTMLDebugFooter).Append("\n"); return(result.ToString()); }
/// <summary> /// Returns all lines of text in given model /// </summary> /// <param name="model"></param> /// <returns></returns> public static IEnumerable <GMLine> Lines(this TextGeometryModel model) { return(model.AllTextBlocks().SelectMany(textBlock => textBlock.Paragraphs()).SelectMany(para => para.Lines())); }