Exemple #1
0
        public List <string> GetColumnAsStrings(string column, bool fuzzy = true)
        {
            PdfTextElementColumn columnData = GetColumn(column, fuzzy);

            // Emit result
            var result = new List <string>();

            foreach (PdfTextElement elem in columnData.Elements)
            {
                result.Add(elem.VisibleText);
            }
            return(result);
        }
Exemple #2
0
        public PdfTextElementColumn GetColumn(string column, bool fuzzy = true)
        {
            PdfTextElement columnHead = FindElementByText(column, fuzzy);

            if (columnHead == null)
            {
                return(PdfTextElementColumn.Empty);
            }
            double headY  = columnHead.GetY() - columnHead.VisibleHeight;
            double headX1 = columnHead.GetX();
            double headX2 = headX1 + columnHead.VisibleWidth;

            // Determine horizontal extent
            double extentX1 = double.MinValue;
            double extentX2 = double.MaxValue;

            foreach (PdfTextElement elem in _textElements)
            {
                if (elem == columnHead)
                {
                    continue;
                }
                if (TextElementHorizontalIntersection(columnHead, elem) == false)
                {
                    continue;
                }
                double elemX1 = elem.GetX();
                double elemX2 = elemX1 + elem.VisibleWidth;

                if (elemX2 < headX1)
                {
                    if (elemX2 > extentX1)
                    {
                        extentX1 = elemX2;
                    }
                }
                if (elemX1 > headX2)
                {
                    if (elemX1 < extentX2)
                    {
                        extentX2 = elemX1;
                    }
                }
            }

            PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2);

            return(columnData);
        }
Exemple #3
0
        public Bitmap RenderColumn(PdfTextElementColumn columnData, Bitmap bmp = null)
        {
            Graphics gc;

            if (bmp == null)
            {
                bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
                gc  = Graphics.FromImage(bmp);
                gc.Clear(Color.White);
            }
            else
            {
                gc = Graphics.FromImage(bmp);
            }

            // Draw text elements of the column header
            using (Pen penTextElem = new Pen(Color.Green))
                using (Pen penCharElem = new Pen(Color.DarkGreen))
                {
                    DrawTextElement(columnData.HeadTextElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Olive);
                }

            // Draw text elements of the column
            using (Pen penTextElem = new Pen(Color.Red))
                using (Pen penCharElem = new Pen(Color.DarkRed))
                {
                    foreach (PdfTextElement textElement in columnData.Elements)
                    {
                        DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.OrangeRed);
                    }
                }

            // Draw column extents
            using (Pen penColumn = new Pen(Color.Red))
            {
                float y  = (float)(_pageRect.YMax - columnData.Y);
                float x1 = (float)(columnData.X1 - _pageRect.XMin);
                float x2 = (float)(columnData.X2 - _pageRect.XMin);

                gc.DrawLine(penColumn, x1 * _scale, y * _scale, x2 * _scale, y * _scale);
                gc.DrawLine(penColumn, x1 * _scale, y * _scale, x1 * _scale, _pageHeight * _scale);
                gc.DrawLine(penColumn, x2 * _scale, y * _scale, x2 * _scale, _pageHeight * _scale);
            }

            gc.Dispose();
            return(bmp);
        }
Exemple #4
0
        public PdfTextElementColumn GetColumn(PdfTextElement columnHead, double headY, double headX1, double headX2, double extentX1, double extentX2)
        {
            // Get all the elements that intersects vertically, are down and sort results
            var columnDataRaw = new List <PdfTextElement>();

            foreach (PdfTextElement elem in _textElements)
            {
                if (TextElementVerticalIntersection(elem, headX1, headX2) == false)
                {
                    continue;
                }

                // Only intems down the column
                double elemY = elem.GetY();
                if (elemY >= headY)
                {
                    continue;
                }

                columnDataRaw.Add(elem);
            }
            columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();

            // Only items completelly inside extents, try spliting big elements and break on big elements that can't be splitted
            var columnElements = new List <PdfTextElement>();

            foreach (PdfTextElement elem in columnDataRaw)
            {
                double elemX1 = elem.GetX();
                double elemX2 = elemX1 + elem.VisibleWidth;

                // Add elements completely inside
                if (elemX1 > extentX1 && elemX2 < extentX2)
                {
                    columnElements.Add(elem);
                    continue;
                }

                // Try to split elements intersecting extents of the column
                double maxSpacing      = elem.Characters.Average(c => c.Width) / 10;
                int    indexStart      = 0;
                int    indexEnd        = elem.Characters.Count - 1;
                bool   indexStartValid = true;
                bool   indexEndValid   = true;
                if (elemX1 < extentX1)
                {
                    // Search best start
                    int    index             = 0;
                    double characterPosition = elemX1 + elem.Characters[index].Displacement;
                    while (characterPosition < extentX1 && index < (elem.Characters.Count - 1))
                    {
                        index++;
                        characterPosition = elemX1 + elem.Characters[index].Displacement;
                    }
                    double spacing = elem.GetCharacterPreviousSpacing(index);
                    while (spacing < maxSpacing && index < (elem.Characters.Count - 1))
                    {
                        index++;
                        spacing = elem.GetCharacterPreviousSpacing(index);
                    }
                    if (spacing < maxSpacing)
                    {
                        indexStartValid = false;
                    }
                    indexStart = index;
                }

                if (elemX2 > extentX2)
                {
                    // Search best end
                    int    index             = elem.Characters.Count - 1;
                    double characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
                    while (characterPosition > extentX2 && index > 0)
                    {
                        index--;
                        characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
                    }
                    double spacing = elem.GetCharacterPrecedingSpacing(index);
                    while (spacing < maxSpacing && index > 0)
                    {
                        index--;
                        spacing = elem.GetCharacterPrecedingSpacing(index);
                    }
                    if (spacing < maxSpacing)
                    {
                        indexEndValid = false;
                    }
                    indexEnd = index;
                }

                // Break when there is no good split, spaning all extent
                if (indexStartValid == false && indexEndValid == false)
                {
                    break;
                }

                // Continue when only one of the sides is invalid. (outside elements intersecting extents of the column)
                if (indexStartValid == false || indexEndValid == false)
                {
                    continue;
                }

                // Add splitted element
                columnElements.Add(elem.SubPart(indexStart, indexEnd + 1));
            }

            var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2);

            return(columnData);
        }