public GMParagraph AddParagraph(BoundingBox boundingBox) { var paragraph = new GMParagraph(boundingBox); AddParagraph(paragraph); return(paragraph); }
public static string AsText(this GMParagraph paragraph, bool useLF = true) { var buffer = new StringBuilder(); foreach (var line in paragraph.Lines()) { if (buffer.Length > 0) { buffer.Append(useLF ? '\n' : ' '); } var words = line.Words().ToList(); if (words.Count > 0) { double sumWidth = 0; var charCount = 0; foreach (var word in words) { sumWidth += word.BoundingBox.Width; charCount += word.Text.Length; } var averageWidth = sumWidth / charCount; buffer.Append(words[0].Text); for (var iIdx = 1; iIdx < words.Count; iIdx++) { var curWord = words[iIdx]; var distance = HDistance(words[iIdx - 1].BoundingBox, curWord.BoundingBox); var spacesCount = (int)Math.Round(distance / averageWidth); if (spacesCount == 0) { spacesCount = 1; } buffer.Append(new string(' ', spacesCount)); buffer.Append(curWord.Text); } } } return(buffer.ToString()); }
private static GMTextBlock RemoveEmptyElements(this GMTextBlock textBlock) { var textBlockNew = new GMTextBlock(textBlock.BoundingBox); foreach (var paragraph in textBlock.Paragraphs()) { var paragraphNew = new GMParagraph(paragraph.BoundingBox); foreach (var line in paragraph.Lines()) { var lineNew = new GMLine(line.BoundingBox); foreach (var word in line.Words()) { if (word.Text.Trim().Length > 0) { var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy); lineNew.AddWord(wordNew); } } if (lineNew.Words().Any()) { paragraphNew.AddLine(lineNew); } } if (paragraphNew.Lines().Any()) { textBlockNew.AddParagraph(paragraphNew); } foreach (var word in textBlock.StandaloneWords()) { if (word.Text.Trim().Length > 0) { var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy); textBlockNew.AddStandaloneWord(wordNew); } } } return(textBlockNew); }
public void AddParagraph(GMParagraph para) { paragraphs.Add(para); }
public static int MinAccuracy(this GMParagraph paragraph) { var minAccuracy = paragraph.Lines().SelectMany(line => line.Words()).Aggregate(100, (curMin, word) => (word.Accuracy < curMin ? word.Accuracy : curMin)); return(minAccuracy); }