Beispiel #1
0
        private void SplitTextElements()
        {
            var textElementsSplitted = new List <PdfTextElement>();

            while (_textElements.Count > 0)
            {
                PdfTextElement elem = _textElements[0];
                _textElements.Remove(elem);

                double maxWidth = elem.MaxWidth();

                int prevBreak = 0;
                for (int i = 1; i < elem.Characters.Count; i++)
                {
                    double prevCharEnd    = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width;
                    double charSeparation = elem.Characters[i].Displacement - prevCharEnd;
                    if (charSeparation > maxWidth)
                    {
                        PdfTextElement partElem = elem.SubPart(prevBreak, i);
                        textElementsSplitted.Add(partElem);
                        prevBreak = i;
                    }
                }

                if (prevBreak == 0)
                {
                    textElementsSplitted.Add(elem);
                    continue;
                }
                PdfTextElement lastElem = elem.SubPart(prevBreak, elem.Characters.Count);
                textElementsSplitted.Add(lastElem);
            }
            _textElements = textElementsSplitted;
        }
Beispiel #2
0
        private void JoinTextElements()
        {
            var textElementsCondensed = new List <PdfTextElement>();

            while (_textElements.Count > 0)
            {
                PdfTextElement elem = _textElements[0];
                _textElements.Remove(elem);
                double blockY    = elem.GetY();
                double blockXMin = elem.GetX();
                double blockXMax = blockXMin + elem.VisibleWidth;

                // Prepare first neighbour
                var textElementNeighbours = new List <PdfTextElement>();
                textElementNeighbours.Add(elem);

                // Search Neighbours
                int i = 0;
                while (i < _textElements.Count)
                {
                    PdfTextElement neighbour = _textElements[i];

                    if (neighbour.Font != elem.Font || neighbour.FontSize != elem.FontSize)
                    {
                        i++;
                        continue;
                    }

                    double neighbourY = neighbour.GetY();
                    if (Math.Abs(neighbourY - blockY) > 0.001)
                    {
                        i++; continue;
                    }

                    double maxWidth = neighbour.MaxWidth();

                    double neighbourXMin = neighbour.GetX();
                    double neighbourXMax = neighbourXMin + neighbour.VisibleWidth;
                    double auxBlockXMin  = blockXMin - maxWidth;
                    double auxBlockXMax  = blockXMax + maxWidth;
                    if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin)
                    {
                        _textElements.Remove(neighbour);
                        textElementNeighbours.Add(neighbour);
                        if (blockXMax < neighbourXMax)
                        {
                            blockXMax = neighbourXMax;
                        }
                        if (blockXMin > neighbourXMin)
                        {
                            blockXMin = neighbourXMin;
                        }
                        i = 0;
                        continue;
                    }
                    i++;
                }

                if (textElementNeighbours.Count == 1)
                {
                    textElementsCondensed.Add(elem);
                    continue;
                }

                // Join neighbours
                var chars = new List <PdfCharElement>();
                foreach (PdfTextElement neighbour in textElementNeighbours)
                {
                    double neighbourXMin = neighbour.GetX();
                    foreach (PdfCharElement c in neighbour.Characters)
                    {
                        chars.Add(new PdfCharElement
                        {
                            Char         = c.Char,
                            Displacement = (c.Displacement + neighbourXMin) - blockXMin,
                            Width        = c.Width,
                        });
                    }
                }
                chars = chars.OrderBy(c => c.Displacement).ToList();
                var sbText = new StringBuilder();
                foreach (PdfCharElement c in chars)
                {
                    sbText.Append(c.Char);
                }
                PdfTextElement blockElem = new PdfTextElement
                {
                    Font          = null,
                    FontSize      = elem.FontSize,
                    Matrix        = elem.Matrix.Copy(),
                    RawText       = sbText.ToString(),
                    VisibleText   = sbText.ToString(),
                    VisibleWidth  = blockXMax - blockXMin,
                    VisibleHeight = elem.VisibleHeight,
                    Characters    = chars,
                    Childs        = textElementNeighbours,
                };
                blockElem.Matrix.Matrix[0, 2] = blockXMin;
                textElementsCondensed.Add(blockElem);
            }
            _textElements = textElementsCondensed;
        }