private bool TextElementHorizontalIntersection(PdfTextElement elem1, PdfTextElement elem2) { double elem1Y1 = elem1.GetY(); double elem1Y2 = elem1.GetY() + elem1.VisibleHeight; double elem2Y1 = elem2.GetY(); double elem2Y2 = elem2.GetY() + elem2.VisibleHeight; return(elem1Y2 >= elem2Y1 && elem2Y2 >= elem1Y1); }
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText) { if (textElement == null) { return; } double textElementX = textElement.GetX() - pageXMin; double textElementY = textElement.GetY() - pageYMin; double textElementWidth = textElement.VisibleWidth; double textElementHeight = textElement.VisibleHeight; string textElementText = textElement.VisibleText; string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name); if (textElementHeight < 0.0001) { return; } double textElementPageX = textElementX; double textElementPageY = pageHeight - textElementY; if (penTextElem != null) { DrawRoundedRectangle(gc, penTextElem, (int)(textElementPageX * scale), (int)(textElementPageY * scale), (int)(textElementWidth * scale), (int)(textElementHeight * scale), 5); } using (Font font = new Font("Arial", (int)(textElementHeight * scale), GraphicsUnit.Pixel)) { foreach (PdfCharElement c in textElement.Characters) { gc.DrawString(c.Char, font, brushText, (int)((textElementPageX + c.Displacement) * scale), (int)(textElementPageY * scale)); if (penCharElem != null) { DrawRoundedRectangle(gc, penCharElem, (int)((textElementPageX + c.Displacement) * scale), (int)(textElementPageY * scale), (int)(c.Width * scale), (int)(textElementHeight * scale), 5); } } } }
public PdfTextElementColumn GetColumn(string column, bool fuzzy = true) { PdfTextElement columnHead = FindElementByText(column, fuzzy); if (columnHead == null) { return(PdfTextElementColumn.Empty); } double headY = columnHead.GetY() - columnHead.VisibleHeight; double headX1 = columnHead.GetX(); double headX2 = headX1 + columnHead.VisibleWidth; // Determine horizontal extent double extentX1 = double.MinValue; double extentX2 = double.MaxValue; foreach (PdfTextElement elem in _textElements) { if (elem == columnHead) { continue; } if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; } double elemX1 = elem.GetX(); double elemX2 = elemX1 + elem.VisibleWidth; if (elemX2 < headX1) { if (elemX2 > extentX1) { extentX1 = elemX2; } } if (elemX1 > headX2) { if (elemX1 < extentX2) { extentX2 = elemX1; } } } PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2); return(columnData); }
private void JoinTextElements() { var textElementsCondensed = new List <PdfTextElement>(); while (_textElements.Count > 0) { PdfTextElement elem = _textElements[0]; _textElements.Remove(elem); double blockY = elem.GetY(); double blockXMin = elem.GetX(); double blockXMax = blockXMin + elem.VisibleWidth; // Prepare first neighbour var textElementNeighbours = new List <PdfTextElement>(); textElementNeighbours.Add(elem); // Search Neighbours int i = 0; while (i < _textElements.Count) { PdfTextElement neighbour = _textElements[i]; if (neighbour.Font != elem.Font || neighbour.FontSize != elem.FontSize) { i++; continue; } double neighbourY = neighbour.GetY(); if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; } double maxWidth = neighbour.MaxWidth(); double neighbourXMin = neighbour.GetX(); double neighbourXMax = neighbourXMin + neighbour.VisibleWidth; double auxBlockXMin = blockXMin - maxWidth; double auxBlockXMax = blockXMax + maxWidth; if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin) { _textElements.Remove(neighbour); textElementNeighbours.Add(neighbour); if (blockXMax < neighbourXMax) { blockXMax = neighbourXMax; } if (blockXMin > neighbourXMin) { blockXMin = neighbourXMin; } i = 0; continue; } i++; } if (textElementNeighbours.Count == 1) { textElementsCondensed.Add(elem); continue; } // Join neighbours var chars = new List <PdfCharElement>(); foreach (PdfTextElement neighbour in textElementNeighbours) { double neighbourXMin = neighbour.GetX(); foreach (PdfCharElement c in neighbour.Characters) { chars.Add(new PdfCharElement { Char = c.Char, Displacement = (c.Displacement + neighbourXMin) - blockXMin, Width = c.Width, }); } } chars = chars.OrderBy(c => c.Displacement).ToList(); var sbText = new StringBuilder(); foreach (PdfCharElement c in chars) { sbText.Append(c.Char); } PdfTextElement blockElem = new PdfTextElement { Font = null, FontSize = elem.FontSize, Matrix = elem.Matrix.Copy(), RawText = sbText.ToString(), VisibleText = sbText.ToString(), VisibleWidth = blockXMax - blockXMin, VisibleHeight = elem.VisibleHeight, Characters = chars, Childs = textElementNeighbours, }; blockElem.Matrix.Matrix[0, 2] = blockXMin; textElementsCondensed.Add(blockElem); } _textElements = textElementsCondensed; }