private static GMTextBlock TransformTextBlock(GMTextBlock textBlock, Func <BoundingBox, BoundingBox> boxConverter) { var targetTextBlock = new GMTextBlock(boxConverter(textBlock.BoundingBox)); foreach (var paragraph in textBlock.Paragraphs()) { var targetParagraph = targetTextBlock.AddParagraph(boxConverter(paragraph.BoundingBox)); foreach (var line in paragraph.Lines()) { var targetLine = targetParagraph.AddLine(boxConverter(line.BoundingBox)); foreach (var word in line.Words()) { var targetWord = new GMWord(boxConverter(word.BoundingBox), word.Text, word.Accuracy); targetLine.AddWord(targetWord); } } } foreach (var word in textBlock.StandaloneWords()) { var targetWord = new GMWord(boxConverter(word.BoundingBox), word.Text, word.Accuracy); targetTextBlock.AddStandaloneWord(targetWord); } return(targetTextBlock); }
private static GMTextBlock RemoveEmptyElements(this GMTextBlock textBlock) { var textBlockNew = new GMTextBlock(textBlock.BoundingBox); foreach (var paragraph in textBlock.Paragraphs()) { var paragraphNew = new GMParagraph(paragraph.BoundingBox); foreach (var line in paragraph.Lines()) { var lineNew = new GMLine(line.BoundingBox); foreach (var word in line.Words()) { if (word.Text.Trim().Length > 0) { var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy); lineNew.AddWord(wordNew); } } if (lineNew.Words().Any()) { paragraphNew.AddLine(lineNew); } } if (paragraphNew.Lines().Any()) { textBlockNew.AddParagraph(paragraphNew); } foreach (var word in textBlock.StandaloneWords()) { if (word.Text.Trim().Length > 0) { var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy); textBlockNew.AddStandaloneWord(wordNew); } } } return(textBlockNew); }