private static GMTextBlock TransformTextBlock(GMTextBlock textBlock, Func <BoundingBox, BoundingBox> boxConverter) { var targetTextBlock = new GMTextBlock(boxConverter(textBlock.BoundingBox)); foreach (var paragraph in textBlock.Paragraphs()) { var targetParagraph = targetTextBlock.AddParagraph(boxConverter(paragraph.BoundingBox)); foreach (var line in paragraph.Lines()) { var targetLine = targetParagraph.AddLine(boxConverter(line.BoundingBox)); foreach (var word in line.Words()) { var targetWord = new GMWord(boxConverter(word.BoundingBox), word.Text, word.Accuracy); targetLine.AddWord(targetWord); } } } foreach (var word in textBlock.StandaloneWords()) { var targetWord = new GMWord(boxConverter(word.BoundingBox), word.Text, word.Accuracy); targetTextBlock.AddStandaloneWord(targetWord); } return(targetTextBlock); }
public GMTextBlock AddTextBlock([NotNull] BoundingBox boundingBox) { var result = new GMTextBlock(boundingBox); textBlocks.Add(result); return(result); }
public GMTableCell([NotNull] GMTextBlock textBlock, int rowIndex, int colIndex, int rowSpan, int colSpan) { TextBlock = textBlock; RowIndex = rowIndex; ColIndex = colIndex; RowSpan = rowSpan; ColSpan = colSpan; }
private static GMTableCell CreateTableCell(GMTextBlock textBlock, int[] hSeparators, int[] vSeparators) { var box = textBlock.BoundingBox; int rowIndex = hSeparators.Count(y => y <= box.YMin) - 1; int rowSpan = hSeparators.Count(y => y <= box.YMax) - rowIndex - 1; int colIndex = vSeparators.Count(x => x <= box.XMin) - 1; int colSpan = vSeparators.Count(x => x <= box.XMax) - colIndex - 1; return(new GMTableCell(textBlock, rowIndex, colIndex, rowSpan, colSpan)); }
private static void AppendTextBlock(GMTextBlock textBlock, StringBuilder result) { var bbox = textBlock.BoundingBox; result.Append(string.Format(HTMLDebugTextBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height)); foreach (var paragraph in textBlock.Paragraphs()) { bbox = paragraph.BoundingBox; result.Append(string.Format(HTMLDebugParagraphBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height)); foreach (var line in paragraph.Lines()) { bbox = line.BoundingBox; result.Append(string.Format(HTMLDebugLineBoxTemplate, bbox.XMin, bbox.YMin, bbox.Width, bbox.Height)); } } }
private static GMTextBlock RemoveEmptyElements(this GMTextBlock textBlock) { var textBlockNew = new GMTextBlock(textBlock.BoundingBox); foreach (var paragraph in textBlock.Paragraphs()) { var paragraphNew = new GMParagraph(paragraph.BoundingBox); foreach (var line in paragraph.Lines()) { var lineNew = new GMLine(line.BoundingBox); foreach (var word in line.Words()) { if (word.Text.Trim().Length > 0) { var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy); lineNew.AddWord(wordNew); } } if (lineNew.Words().Any()) { paragraphNew.AddLine(lineNew); } } if (paragraphNew.Lines().Any()) { textBlockNew.AddParagraph(paragraphNew); } foreach (var word in textBlock.StandaloneWords()) { if (word.Text.Trim().Length > 0) { var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy); textBlockNew.AddStandaloneWord(wordNew); } } } return(textBlockNew); }
public void AddTextBlock(GMTextBlock textBlock) { textBlocks.Add(textBlock); }