예제 #1
0
        public GMParagraph AddParagraph(BoundingBox boundingBox)
        {
            var paragraph = new GMParagraph(boundingBox);

            AddParagraph(paragraph);
            return(paragraph);
        }
        public static string AsText(this GMParagraph paragraph, bool useLF = true)
        {
            var buffer = new StringBuilder();

            foreach (var line in paragraph.Lines())
            {
                if (buffer.Length > 0)
                {
                    buffer.Append(useLF ? '\n' : ' ');
                }
                var words = line.Words().ToList();
                if (words.Count > 0)
                {
                    double sumWidth  = 0;
                    var    charCount = 0;
                    foreach (var word in words)
                    {
                        sumWidth  += word.BoundingBox.Width;
                        charCount += word.Text.Length;
                    }
                    var averageWidth = sumWidth / charCount;

                    buffer.Append(words[0].Text);
                    for (var iIdx = 1; iIdx < words.Count; iIdx++)
                    {
                        var curWord     = words[iIdx];
                        var distance    = HDistance(words[iIdx - 1].BoundingBox, curWord.BoundingBox);
                        var spacesCount = (int)Math.Round(distance / averageWidth);
                        if (spacesCount == 0)
                        {
                            spacesCount = 1;
                        }
                        buffer.Append(new string(' ', spacesCount));
                        buffer.Append(curWord.Text);
                    }
                }
            }
            return(buffer.ToString());
        }
        private static GMTextBlock RemoveEmptyElements(this GMTextBlock textBlock)
        {
            var textBlockNew = new GMTextBlock(textBlock.BoundingBox);

            foreach (var paragraph in textBlock.Paragraphs())
            {
                var paragraphNew = new GMParagraph(paragraph.BoundingBox);
                foreach (var line in paragraph.Lines())
                {
                    var lineNew = new GMLine(line.BoundingBox);
                    foreach (var word in line.Words())
                    {
                        if (word.Text.Trim().Length > 0)
                        {
                            var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy);
                            lineNew.AddWord(wordNew);
                        }
                    }
                    if (lineNew.Words().Any())
                    {
                        paragraphNew.AddLine(lineNew);
                    }
                }
                if (paragraphNew.Lines().Any())
                {
                    textBlockNew.AddParagraph(paragraphNew);
                }
                foreach (var word in textBlock.StandaloneWords())
                {
                    if (word.Text.Trim().Length > 0)
                    {
                        var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy);
                        textBlockNew.AddStandaloneWord(wordNew);
                    }
                }
            }
            return(textBlockNew);
        }
예제 #4
0
 public void AddParagraph(GMParagraph para)
 {
     paragraphs.Add(para);
 }
        public static int MinAccuracy(this GMParagraph paragraph)
        {
            var minAccuracy = paragraph.Lines().SelectMany(line => line.Words()).Aggregate(100, (curMin, word) => (word.Accuracy < curMin ? word.Accuracy : curMin));

            return(minAccuracy);
        }