public List <string> GetColumnAsStrings(string column, bool fuzzy = true) { PdfTextElementColumn columnData = GetColumn(column, fuzzy); // Emit result var result = new List <string>(); foreach (PdfTextElement elem in columnData.Elements) { result.Add(elem.VisibleText); } return(result); }
public PdfTextElementColumn GetColumn(string column, bool fuzzy = true) { PdfTextElement columnHead = FindElementByText(column, fuzzy); if (columnHead == null) { return(PdfTextElementColumn.Empty); } double headY = columnHead.GetY() - columnHead.VisibleHeight; double headX1 = columnHead.GetX(); double headX2 = headX1 + columnHead.VisibleWidth; // Determine horizontal extent double extentX1 = double.MinValue; double extentX2 = double.MaxValue; foreach (PdfTextElement elem in _textElements) { if (elem == columnHead) { continue; } if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; } double elemX1 = elem.GetX(); double elemX2 = elemX1 + elem.VisibleWidth; if (elemX2 < headX1) { if (elemX2 > extentX1) { extentX1 = elemX2; } } if (elemX1 > headX2) { if (elemX1 < extentX2) { extentX2 = elemX1; } } } PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2); return(columnData); }
public Bitmap RenderColumn(PdfTextElementColumn columnData, Bitmap bmp = null) { Graphics gc; if (bmp == null) { bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb); gc = Graphics.FromImage(bmp); gc.Clear(Color.White); } else { gc = Graphics.FromImage(bmp); } // Draw text elements of the column header using (Pen penTextElem = new Pen(Color.Green)) using (Pen penCharElem = new Pen(Color.DarkGreen)) { DrawTextElement(columnData.HeadTextElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Olive); } // Draw text elements of the column using (Pen penTextElem = new Pen(Color.Red)) using (Pen penCharElem = new Pen(Color.DarkRed)) { foreach (PdfTextElement textElement in columnData.Elements) { DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.OrangeRed); } } // Draw column extents using (Pen penColumn = new Pen(Color.Red)) { float y = (float)(_pageRect.YMax - columnData.Y); float x1 = (float)(columnData.X1 - _pageRect.XMin); float x2 = (float)(columnData.X2 - _pageRect.XMin); gc.DrawLine(penColumn, x1 * _scale, y * _scale, x2 * _scale, y * _scale); gc.DrawLine(penColumn, x1 * _scale, y * _scale, x1 * _scale, _pageHeight * _scale); gc.DrawLine(penColumn, x2 * _scale, y * _scale, x2 * _scale, _pageHeight * _scale); } gc.Dispose(); return(bmp); }
public PdfTextElementColumn GetColumn(PdfTextElement columnHead, double headY, double headX1, double headX2, double extentX1, double extentX2) { // Get all the elements that intersects vertically, are down and sort results var columnDataRaw = new List <PdfTextElement>(); foreach (PdfTextElement elem in _textElements) { if (TextElementVerticalIntersection(elem, headX1, headX2) == false) { continue; } // Only intems down the column double elemY = elem.GetY(); if (elemY >= headY) { continue; } columnDataRaw.Add(elem); } columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList(); // Only items completelly inside extents, try spliting big elements and break on big elements that can't be splitted var columnElements = new List <PdfTextElement>(); foreach (PdfTextElement elem in columnDataRaw) { double elemX1 = elem.GetX(); double elemX2 = elemX1 + elem.VisibleWidth; // Add elements completely inside if (elemX1 > extentX1 && elemX2 < extentX2) { columnElements.Add(elem); continue; } // Try to split elements intersecting extents of the column double maxSpacing = elem.Characters.Average(c => c.Width) / 10; int indexStart = 0; int indexEnd = elem.Characters.Count - 1; bool indexStartValid = true; bool indexEndValid = true; if (elemX1 < extentX1) { // Search best start int index = 0; double characterPosition = elemX1 + elem.Characters[index].Displacement; while (characterPosition < extentX1 && index < (elem.Characters.Count - 1)) { index++; characterPosition = elemX1 + elem.Characters[index].Displacement; } double spacing = elem.GetCharacterPreviousSpacing(index); while (spacing < maxSpacing && index < (elem.Characters.Count - 1)) { index++; spacing = elem.GetCharacterPreviousSpacing(index); } if (spacing < maxSpacing) { indexStartValid = false; } indexStart = index; } if (elemX2 > extentX2) { // Search best end int index = elem.Characters.Count - 1; double characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width; while (characterPosition > extentX2 && index > 0) { index--; characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width; } double spacing = elem.GetCharacterPrecedingSpacing(index); while (spacing < maxSpacing && index > 0) { index--; spacing = elem.GetCharacterPrecedingSpacing(index); } if (spacing < maxSpacing) { indexEndValid = false; } indexEnd = index; } // Break when there is no good split, spaning all extent if (indexStartValid == false && indexEndValid == false) { break; } // Continue when only one of the sides is invalid. (outside elements intersecting extents of the column) if (indexStartValid == false || indexEndValid == false) { continue; } // Add splitted element columnElements.Add(elem.SubPart(indexStart, indexEnd + 1)); } var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2); return(columnData); }