public static string ToHtml(this TextGeometryModel model, string imageFileName = null) { var result = new StringBuilder(100000); result.Append(HTMLDebugHeader).Append("\n"); if (imageFileName != null) { result.Append(string.Format(HTMLDebugImageBoxTemplate, model.PageBox.Width, model.PageBox.Height, imageFileName)); } foreach (var textBlock in model.TextBlocks()) { AppendTextBlock(textBlock, result); } foreach (var gmTable in model.Tables()) { var bbox = gmTable.BoundingBox; result.Append(string.Format(HTMLDebugTableBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height)); foreach (var textBlock in gmTable.Cells().Select(c => c.TextBlock)) { AppendTextBlock(textBlock, result); } } foreach (var word in model.Words()) { var bbox = word.BoundingBox; result.Append(string.Format(HTMLDebugWordBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height, word.Text, GetBackgroundStyle(word.Accuracy))); } result.Append(HTMLDebugFooter).Append("\n"); return(result.ToString().Replace("&", "&")); }
public static TextGeometryModel TransformModelGeometry(TextGeometryModel sourceModel, TextGeometryModel targetModel, Func <BoundingBox, BoundingBox> boxConverter) { foreach (var textBlock in sourceModel.TextBlocks().Select(textBlock => TransformTextBlock(textBlock, boxConverter))) { targetModel.AddTextBlock(textBlock); } foreach (var table in sourceModel.Tables().Select(table => TransformTable(table, boxConverter))) { targetModel.AddTable(table); } return(targetModel); }
public static TextGeometryModel RemoveEmptyElements(this TextGeometryModel model) { var result = new TextGeometryModel(model.PageBox, model.GridUnit); foreach (var textBlock in model.TextBlocks()) { var textBlockNew = textBlock.RemoveEmptyElements(); if (textBlockNew.Paragraphs().Any() || textBlockNew.StandaloneWords().Any()) { result.AddTextBlock(textBlockNew); } } foreach (var table in model.Tables().Select(RemoveEmptyElementsInTable)) { result.AddTable(table); } return(result); }