Пример #1
0
        private PdfTextElement BuildTextElement()
        {
            PdfTextElement textElem = new PdfTextElement();

            textElem.Font        = _font;
            textElem.FontSize    = _fontSize;
            textElem.Matrix      = _textMatrixCurrent.Multiply(_graphicsMatrix);
            textElem.RawText     = _sbText.ToString();
            textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font);
            PdfCharElement lastChar = _listCharacters[_listCharacters.Count - 1];

            textElem.VisibleWidth  = (lastChar.Displacement + lastChar.Width) * textElem.Matrix.Matrix[0, 0];
            textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1];
            textElem.Characters    = new List <PdfCharElement>();
            foreach (PdfCharElement c in _listCharacters)
            {
                textElem.Characters.Add(new PdfCharElement
                {
                    Char         = c.Char,
                    Displacement = (c.Displacement * textElem.Matrix.Matrix[0, 0]),
                    Width        = (c.Width * textElem.Matrix.Matrix[0, 0]),
                });
            }
            textElem.Childs = new List <PdfTextElement>();
            return(textElem);
        }
Пример #2
0
        public string GetFieldAsString(string field, bool fuzzy = true)
        {
            PdfTextElement fieldTitle = FindElementByText(field, fuzzy);

            if (fieldTitle == null)
            {
                return(null);
            }
            double titleX    = fieldTitle.GetX();
            var    fieldData = new List <PdfTextElement>();


            foreach (PdfTextElement elem in _textElements)
            {
                if (TextElementHorizontalIntersection(fieldTitle, elem) == false)
                {
                    continue;
                }
                double elemX = elem.GetX();
                if (elemX <= titleX)
                {
                    continue;
                }

                fieldData.Add(elem);
            }

            if (fieldData.Count == 0)
            {
                return(null);
            }

            return(fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText);
        }
Пример #3
0
        private bool TextElementVerticalIntersection(PdfTextElement elem1, double elem2X1, double elem2X2)
        {
            double elem1X1 = elem1.GetX();
            double elem1X2 = elem1.GetX() + elem1.VisibleWidth;

            return(elem1X2 >= elem2X1 && elem2X2 >= elem1X1);
        }
Пример #4
0
        private void SplitTextElements()
        {
            var textElementsSplitted = new List <PdfTextElement>();

            while (_textElements.Count > 0)
            {
                PdfTextElement elem = _textElements[0];
                _textElements.Remove(elem);

                double maxWidth = elem.MaxWidth();

                int prevBreak = 0;
                for (int i = 1; i < elem.Characters.Count; i++)
                {
                    double prevCharEnd    = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width;
                    double charSeparation = elem.Characters[i].Displacement - prevCharEnd;
                    if (charSeparation > maxWidth)
                    {
                        PdfTextElement partElem = elem.SubPart(prevBreak, i);
                        textElementsSplitted.Add(partElem);
                        prevBreak = i;
                    }
                }

                if (prevBreak == 0)
                {
                    textElementsSplitted.Add(elem);
                    continue;
                }
                PdfTextElement lastElem = elem.SubPart(prevBreak, elem.Characters.Count);
                textElementsSplitted.Add(lastElem);
            }
            _textElements = textElementsSplitted;
        }
Пример #5
0
 private void AddTextElement(PdfTextElement textElement)
 {
     if (string.IsNullOrEmpty(textElement.VisibleText.Trim()))
     {
         return;
     }
     _textElements.Add(textElement);
 }
Пример #6
0
 public PdfTextElementColumn(PdfTextElement head, IEnumerable <PdfTextElement> elements, double y, double x1, double x2)
 {
     HeadTextElement = head;
     Elements        = elements;
     Y  = y;
     X1 = x1;
     X2 = x2;
 }
Пример #7
0
        private bool TextElementHorizontalIntersection(PdfTextElement elem1, PdfTextElement elem2)
        {
            double elem1Y1 = elem1.GetY();
            double elem1Y2 = elem1.GetY() + elem1.VisibleHeight;
            double elem2Y1 = elem2.GetY();
            double elem2Y2 = elem2.GetY() + elem2.VisibleHeight;

            return(elem1Y2 >= elem2Y1 && elem2Y2 >= elem1Y1);
        }
Пример #8
0
        private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText)
        {
            if (textElement == null)
            {
                return;
            }
            double textElementX        = textElement.GetX() - pageXMin;
            double textElementY        = textElement.GetY() - pageYMin;
            double textElementWidth    = textElement.VisibleWidth;
            double textElementHeight   = textElement.VisibleHeight;
            string textElementText     = textElement.VisibleText;
            string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name);

            if (textElementHeight < 0.0001)
            {
                return;
            }

            double textElementPageX = textElementX;
            double textElementPageY = pageHeight - textElementY;

            if (penTextElem != null)
            {
                DrawRoundedRectangle(gc, penTextElem,
                                     (int)(textElementPageX * scale),
                                     (int)(textElementPageY * scale),
                                     (int)(textElementWidth * scale),
                                     (int)(textElementHeight * scale),
                                     5);
            }

            using (Font font = new Font("Arial", (int)(textElementHeight * scale), GraphicsUnit.Pixel))
            {
                foreach (PdfCharElement c in textElement.Characters)
                {
                    gc.DrawString(c.Char,
                                  font,
                                  brushText,
                                  (int)((textElementPageX + c.Displacement) * scale),
                                  (int)(textElementPageY * scale));
                    if (penCharElem != null)
                    {
                        DrawRoundedRectangle(gc, penCharElem,
                                             (int)((textElementPageX + c.Displacement) * scale),
                                             (int)(textElementPageY * scale),
                                             (int)(c.Width * scale),
                                             (int)(textElementHeight * scale),
                                             5);
                    }
                }
            }
        }
Пример #9
0
        public PdfTextElementColumn GetColumn(string column, bool fuzzy = true)
        {
            PdfTextElement columnHead = FindElementByText(column, fuzzy);

            if (columnHead == null)
            {
                return(PdfTextElementColumn.Empty);
            }
            double headY  = columnHead.GetY() - columnHead.VisibleHeight;
            double headX1 = columnHead.GetX();
            double headX2 = headX1 + columnHead.VisibleWidth;

            // Determine horizontal extent
            double extentX1 = double.MinValue;
            double extentX2 = double.MaxValue;

            foreach (PdfTextElement elem in _textElements)
            {
                if (elem == columnHead)
                {
                    continue;
                }
                if (TextElementHorizontalIntersection(columnHead, elem) == false)
                {
                    continue;
                }
                double elemX1 = elem.GetX();
                double elemX2 = elemX1 + elem.VisibleWidth;

                if (elemX2 < headX1)
                {
                    if (elemX2 > extentX1)
                    {
                        extentX1 = elemX2;
                    }
                }
                if (elemX1 > headX2)
                {
                    if (elemX1 < extentX2)
                    {
                        extentX2 = elemX1;
                    }
                }
            }

            PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2);

            return(columnData);
        }
Пример #10
0
        private void FlushTextElement()
        {
            if (_sbText.Length == 0)
            {
                _textWidth = 0;
                return;
            }
            PdfTextElement textElem = BuildTextElement();

            AddTextElement(textElem);

            _textMatrixCurrent.Matrix[0, 2] += _textWidth;

            _sbText = new StringBuilder();
            _listCharacters.Clear();
            _textWidth = 0;
        }
Пример #11
0
        public PdfTextElement SubPart(int startIndex, int endIndex)
        {
            PdfTextElement blockElem = new PdfTextElement
            {
                Font          = null,
                FontSize      = FontSize,
                Matrix        = Matrix.Copy(),
                RawText       = RawText.Substring(startIndex, endIndex - startIndex),
                VisibleText   = VisibleText.Substring(startIndex, endIndex - startIndex),
                VisibleWidth  = 0,
                VisibleHeight = VisibleHeight,
                Characters    = new List <PdfCharElement>(),
                Childs        = new List <PdfTextElement>(),
            };
            double displacement = Characters[startIndex].Displacement;

            blockElem.Matrix.Matrix[0, 2] += displacement;
            for (int j = startIndex; j < endIndex; j++)
            {
                blockElem.Characters.Add(new PdfCharElement
                {
                    Char         = Characters[j].Char,
                    Displacement = Characters[j].Displacement - displacement,
                    Width        = Characters[j].Width,
                });
            }
            PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1];

            blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width;
            foreach (PdfTextElement elem in Childs)
            {
                blockElem.Childs.Add(elem);
            }

            return(blockElem);
        }
Пример #12
0
        public PdfTextElementColumn GetColumn(PdfTextElement columnHead, double headY, double headX1, double headX2, double extentX1, double extentX2)
        {
            // Get all the elements that intersects vertically, are down and sort results
            var columnDataRaw = new List <PdfTextElement>();

            foreach (PdfTextElement elem in _textElements)
            {
                if (TextElementVerticalIntersection(elem, headX1, headX2) == false)
                {
                    continue;
                }

                // Only intems down the column
                double elemY = elem.GetY();
                if (elemY >= headY)
                {
                    continue;
                }

                columnDataRaw.Add(elem);
            }
            columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();

            // Only items completelly inside extents, try spliting big elements and break on big elements that can't be splitted
            var columnElements = new List <PdfTextElement>();

            foreach (PdfTextElement elem in columnDataRaw)
            {
                double elemX1 = elem.GetX();
                double elemX2 = elemX1 + elem.VisibleWidth;

                // Add elements completely inside
                if (elemX1 > extentX1 && elemX2 < extentX2)
                {
                    columnElements.Add(elem);
                    continue;
                }

                // Try to split elements intersecting extents of the column
                double maxSpacing      = elem.Characters.Average(c => c.Width) / 10;
                int    indexStart      = 0;
                int    indexEnd        = elem.Characters.Count - 1;
                bool   indexStartValid = true;
                bool   indexEndValid   = true;
                if (elemX1 < extentX1)
                {
                    // Search best start
                    int    index             = 0;
                    double characterPosition = elemX1 + elem.Characters[index].Displacement;
                    while (characterPosition < extentX1 && index < (elem.Characters.Count - 1))
                    {
                        index++;
                        characterPosition = elemX1 + elem.Characters[index].Displacement;
                    }
                    double spacing = elem.GetCharacterPreviousSpacing(index);
                    while (spacing < maxSpacing && index < (elem.Characters.Count - 1))
                    {
                        index++;
                        spacing = elem.GetCharacterPreviousSpacing(index);
                    }
                    if (spacing < maxSpacing)
                    {
                        indexStartValid = false;
                    }
                    indexStart = index;
                }

                if (elemX2 > extentX2)
                {
                    // Search best end
                    int    index             = elem.Characters.Count - 1;
                    double characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
                    while (characterPosition > extentX2 && index > 0)
                    {
                        index--;
                        characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
                    }
                    double spacing = elem.GetCharacterPrecedingSpacing(index);
                    while (spacing < maxSpacing && index > 0)
                    {
                        index--;
                        spacing = elem.GetCharacterPrecedingSpacing(index);
                    }
                    if (spacing < maxSpacing)
                    {
                        indexEndValid = false;
                    }
                    indexEnd = index;
                }

                // Break when there is no good split, spaning all extent
                if (indexStartValid == false && indexEndValid == false)
                {
                    break;
                }

                // Continue when only one of the sides is invalid. (outside elements intersecting extents of the column)
                if (indexStartValid == false || indexEndValid == false)
                {
                    continue;
                }

                // Add splitted element
                columnElements.Add(elem.SubPart(indexStart, indexEnd + 1));
            }

            var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2);

            return(columnData);
        }
Пример #13
0
        private void JoinTextElements()
        {
            var textElementsCondensed = new List <PdfTextElement>();

            while (_textElements.Count > 0)
            {
                PdfTextElement elem = _textElements[0];
                _textElements.Remove(elem);
                double blockY    = elem.GetY();
                double blockXMin = elem.GetX();
                double blockXMax = blockXMin + elem.VisibleWidth;

                // Prepare first neighbour
                var textElementNeighbours = new List <PdfTextElement>();
                textElementNeighbours.Add(elem);

                // Search Neighbours
                int i = 0;
                while (i < _textElements.Count)
                {
                    PdfTextElement neighbour = _textElements[i];

                    if (neighbour.Font != elem.Font || neighbour.FontSize != elem.FontSize)
                    {
                        i++;
                        continue;
                    }

                    double neighbourY = neighbour.GetY();
                    if (Math.Abs(neighbourY - blockY) > 0.001)
                    {
                        i++; continue;
                    }

                    double maxWidth = neighbour.MaxWidth();

                    double neighbourXMin = neighbour.GetX();
                    double neighbourXMax = neighbourXMin + neighbour.VisibleWidth;
                    double auxBlockXMin  = blockXMin - maxWidth;
                    double auxBlockXMax  = blockXMax + maxWidth;
                    if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin)
                    {
                        _textElements.Remove(neighbour);
                        textElementNeighbours.Add(neighbour);
                        if (blockXMax < neighbourXMax)
                        {
                            blockXMax = neighbourXMax;
                        }
                        if (blockXMin > neighbourXMin)
                        {
                            blockXMin = neighbourXMin;
                        }
                        i = 0;
                        continue;
                    }
                    i++;
                }

                if (textElementNeighbours.Count == 1)
                {
                    textElementsCondensed.Add(elem);
                    continue;
                }

                // Join neighbours
                var chars = new List <PdfCharElement>();
                foreach (PdfTextElement neighbour in textElementNeighbours)
                {
                    double neighbourXMin = neighbour.GetX();
                    foreach (PdfCharElement c in neighbour.Characters)
                    {
                        chars.Add(new PdfCharElement
                        {
                            Char         = c.Char,
                            Displacement = (c.Displacement + neighbourXMin) - blockXMin,
                            Width        = c.Width,
                        });
                    }
                }
                chars = chars.OrderBy(c => c.Displacement).ToList();
                var sbText = new StringBuilder();
                foreach (PdfCharElement c in chars)
                {
                    sbText.Append(c.Char);
                }
                PdfTextElement blockElem = new PdfTextElement
                {
                    Font          = null,
                    FontSize      = elem.FontSize,
                    Matrix        = elem.Matrix.Copy(),
                    RawText       = sbText.ToString(),
                    VisibleText   = sbText.ToString(),
                    VisibleWidth  = blockXMax - blockXMin,
                    VisibleHeight = elem.VisibleHeight,
                    Characters    = chars,
                    Childs        = textElementNeighbours,
                };
                blockElem.Matrix.Matrix[0, 2] = blockXMin;
                textElementsCondensed.Add(blockElem);
            }
            _textElements = textElementsCondensed;
        }