private PdfTextElement BuildTextElement() { PdfTextElement textElem = new PdfTextElement(); textElem.Font = _font; textElem.FontSize = _fontSize; textElem.Matrix = _textMatrixCurrent.Multiply(_graphicsMatrix); textElem.RawText = _sbText.ToString(); textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font); PdfCharElement lastChar = _listCharacters[_listCharacters.Count - 1]; textElem.VisibleWidth = (lastChar.Displacement + lastChar.Width) * textElem.Matrix.Matrix[0, 0]; textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1]; textElem.Characters = new List <PdfCharElement>(); foreach (PdfCharElement c in _listCharacters) { textElem.Characters.Add(new PdfCharElement { Char = c.Char, Displacement = (c.Displacement * textElem.Matrix.Matrix[0, 0]), Width = (c.Width * textElem.Matrix.Matrix[0, 0]), }); } textElem.Childs = new List <PdfTextElement>(); return(textElem); }
public string GetFieldAsString(string field, bool fuzzy = true) { PdfTextElement fieldTitle = FindElementByText(field, fuzzy); if (fieldTitle == null) { return(null); } double titleX = fieldTitle.GetX(); var fieldData = new List <PdfTextElement>(); foreach (PdfTextElement elem in _textElements) { if (TextElementHorizontalIntersection(fieldTitle, elem) == false) { continue; } double elemX = elem.GetX(); if (elemX <= titleX) { continue; } fieldData.Add(elem); } if (fieldData.Count == 0) { return(null); } return(fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText); }
private bool TextElementVerticalIntersection(PdfTextElement elem1, double elem2X1, double elem2X2) { double elem1X1 = elem1.GetX(); double elem1X2 = elem1.GetX() + elem1.VisibleWidth; return(elem1X2 >= elem2X1 && elem2X2 >= elem1X1); }
private void SplitTextElements() { var textElementsSplitted = new List <PdfTextElement>(); while (_textElements.Count > 0) { PdfTextElement elem = _textElements[0]; _textElements.Remove(elem); double maxWidth = elem.MaxWidth(); int prevBreak = 0; for (int i = 1; i < elem.Characters.Count; i++) { double prevCharEnd = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width; double charSeparation = elem.Characters[i].Displacement - prevCharEnd; if (charSeparation > maxWidth) { PdfTextElement partElem = elem.SubPart(prevBreak, i); textElementsSplitted.Add(partElem); prevBreak = i; } } if (prevBreak == 0) { textElementsSplitted.Add(elem); continue; } PdfTextElement lastElem = elem.SubPart(prevBreak, elem.Characters.Count); textElementsSplitted.Add(lastElem); } _textElements = textElementsSplitted; }
private void AddTextElement(PdfTextElement textElement) { if (string.IsNullOrEmpty(textElement.VisibleText.Trim())) { return; } _textElements.Add(textElement); }
public PdfTextElementColumn(PdfTextElement head, IEnumerable <PdfTextElement> elements, double y, double x1, double x2) { HeadTextElement = head; Elements = elements; Y = y; X1 = x1; X2 = x2; }
private bool TextElementHorizontalIntersection(PdfTextElement elem1, PdfTextElement elem2) { double elem1Y1 = elem1.GetY(); double elem1Y2 = elem1.GetY() + elem1.VisibleHeight; double elem2Y1 = elem2.GetY(); double elem2Y2 = elem2.GetY() + elem2.VisibleHeight; return(elem1Y2 >= elem2Y1 && elem2Y2 >= elem1Y1); }
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText) { if (textElement == null) { return; } double textElementX = textElement.GetX() - pageXMin; double textElementY = textElement.GetY() - pageYMin; double textElementWidth = textElement.VisibleWidth; double textElementHeight = textElement.VisibleHeight; string textElementText = textElement.VisibleText; string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name); if (textElementHeight < 0.0001) { return; } double textElementPageX = textElementX; double textElementPageY = pageHeight - textElementY; if (penTextElem != null) { DrawRoundedRectangle(gc, penTextElem, (int)(textElementPageX * scale), (int)(textElementPageY * scale), (int)(textElementWidth * scale), (int)(textElementHeight * scale), 5); } using (Font font = new Font("Arial", (int)(textElementHeight * scale), GraphicsUnit.Pixel)) { foreach (PdfCharElement c in textElement.Characters) { gc.DrawString(c.Char, font, brushText, (int)((textElementPageX + c.Displacement) * scale), (int)(textElementPageY * scale)); if (penCharElem != null) { DrawRoundedRectangle(gc, penCharElem, (int)((textElementPageX + c.Displacement) * scale), (int)(textElementPageY * scale), (int)(c.Width * scale), (int)(textElementHeight * scale), 5); } } } }
public PdfTextElementColumn GetColumn(string column, bool fuzzy = true) { PdfTextElement columnHead = FindElementByText(column, fuzzy); if (columnHead == null) { return(PdfTextElementColumn.Empty); } double headY = columnHead.GetY() - columnHead.VisibleHeight; double headX1 = columnHead.GetX(); double headX2 = headX1 + columnHead.VisibleWidth; // Determine horizontal extent double extentX1 = double.MinValue; double extentX2 = double.MaxValue; foreach (PdfTextElement elem in _textElements) { if (elem == columnHead) { continue; } if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; } double elemX1 = elem.GetX(); double elemX2 = elemX1 + elem.VisibleWidth; if (elemX2 < headX1) { if (elemX2 > extentX1) { extentX1 = elemX2; } } if (elemX1 > headX2) { if (elemX1 < extentX2) { extentX2 = elemX1; } } } PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2); return(columnData); }
private void FlushTextElement() { if (_sbText.Length == 0) { _textWidth = 0; return; } PdfTextElement textElem = BuildTextElement(); AddTextElement(textElem); _textMatrixCurrent.Matrix[0, 2] += _textWidth; _sbText = new StringBuilder(); _listCharacters.Clear(); _textWidth = 0; }
public PdfTextElement SubPart(int startIndex, int endIndex) { PdfTextElement blockElem = new PdfTextElement { Font = null, FontSize = FontSize, Matrix = Matrix.Copy(), RawText = RawText.Substring(startIndex, endIndex - startIndex), VisibleText = VisibleText.Substring(startIndex, endIndex - startIndex), VisibleWidth = 0, VisibleHeight = VisibleHeight, Characters = new List <PdfCharElement>(), Childs = new List <PdfTextElement>(), }; double displacement = Characters[startIndex].Displacement; blockElem.Matrix.Matrix[0, 2] += displacement; for (int j = startIndex; j < endIndex; j++) { blockElem.Characters.Add(new PdfCharElement { Char = Characters[j].Char, Displacement = Characters[j].Displacement - displacement, Width = Characters[j].Width, }); } PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1]; blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width; foreach (PdfTextElement elem in Childs) { blockElem.Childs.Add(elem); } return(blockElem); }
public PdfTextElementColumn GetColumn(PdfTextElement columnHead, double headY, double headX1, double headX2, double extentX1, double extentX2) { // Get all the elements that intersects vertically, are down and sort results var columnDataRaw = new List <PdfTextElement>(); foreach (PdfTextElement elem in _textElements) { if (TextElementVerticalIntersection(elem, headX1, headX2) == false) { continue; } // Only intems down the column double elemY = elem.GetY(); if (elemY >= headY) { continue; } columnDataRaw.Add(elem); } columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList(); // Only items completelly inside extents, try spliting big elements and break on big elements that can't be splitted var columnElements = new List <PdfTextElement>(); foreach (PdfTextElement elem in columnDataRaw) { double elemX1 = elem.GetX(); double elemX2 = elemX1 + elem.VisibleWidth; // Add elements completely inside if (elemX1 > extentX1 && elemX2 < extentX2) { columnElements.Add(elem); continue; } // Try to split elements intersecting extents of the column double maxSpacing = elem.Characters.Average(c => c.Width) / 10; int indexStart = 0; int indexEnd = elem.Characters.Count - 1; bool indexStartValid = true; bool indexEndValid = true; if (elemX1 < extentX1) { // Search best start int index = 0; double characterPosition = elemX1 + elem.Characters[index].Displacement; while (characterPosition < extentX1 && index < (elem.Characters.Count - 1)) { index++; characterPosition = elemX1 + elem.Characters[index].Displacement; } double spacing = elem.GetCharacterPreviousSpacing(index); while (spacing < maxSpacing && index < (elem.Characters.Count - 1)) { index++; spacing = elem.GetCharacterPreviousSpacing(index); } if (spacing < maxSpacing) { indexStartValid = false; } indexStart = index; } if (elemX2 > extentX2) { // Search best end int index = elem.Characters.Count - 1; double characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width; while (characterPosition > extentX2 && index > 0) { index--; characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width; } double spacing = elem.GetCharacterPrecedingSpacing(index); while (spacing < maxSpacing && index > 0) { index--; spacing = elem.GetCharacterPrecedingSpacing(index); } if (spacing < maxSpacing) { indexEndValid = false; } indexEnd = index; } // Break when there is no good split, spaning all extent if (indexStartValid == false && indexEndValid == false) { break; } // Continue when only one of the sides is invalid. (outside elements intersecting extents of the column) if (indexStartValid == false || indexEndValid == false) { continue; } // Add splitted element columnElements.Add(elem.SubPart(indexStart, indexEnd + 1)); } var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2); return(columnData); }
private void JoinTextElements() { var textElementsCondensed = new List <PdfTextElement>(); while (_textElements.Count > 0) { PdfTextElement elem = _textElements[0]; _textElements.Remove(elem); double blockY = elem.GetY(); double blockXMin = elem.GetX(); double blockXMax = blockXMin + elem.VisibleWidth; // Prepare first neighbour var textElementNeighbours = new List <PdfTextElement>(); textElementNeighbours.Add(elem); // Search Neighbours int i = 0; while (i < _textElements.Count) { PdfTextElement neighbour = _textElements[i]; if (neighbour.Font != elem.Font || neighbour.FontSize != elem.FontSize) { i++; continue; } double neighbourY = neighbour.GetY(); if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; } double maxWidth = neighbour.MaxWidth(); double neighbourXMin = neighbour.GetX(); double neighbourXMax = neighbourXMin + neighbour.VisibleWidth; double auxBlockXMin = blockXMin - maxWidth; double auxBlockXMax = blockXMax + maxWidth; if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin) { _textElements.Remove(neighbour); textElementNeighbours.Add(neighbour); if (blockXMax < neighbourXMax) { blockXMax = neighbourXMax; } if (blockXMin > neighbourXMin) { blockXMin = neighbourXMin; } i = 0; continue; } i++; } if (textElementNeighbours.Count == 1) { textElementsCondensed.Add(elem); continue; } // Join neighbours var chars = new List <PdfCharElement>(); foreach (PdfTextElement neighbour in textElementNeighbours) { double neighbourXMin = neighbour.GetX(); foreach (PdfCharElement c in neighbour.Characters) { chars.Add(new PdfCharElement { Char = c.Char, Displacement = (c.Displacement + neighbourXMin) - blockXMin, Width = c.Width, }); } } chars = chars.OrderBy(c => c.Displacement).ToList(); var sbText = new StringBuilder(); foreach (PdfCharElement c in chars) { sbText.Append(c.Char); } PdfTextElement blockElem = new PdfTextElement { Font = null, FontSize = elem.FontSize, Matrix = elem.Matrix.Copy(), RawText = sbText.ToString(), VisibleText = sbText.ToString(), VisibleWidth = blockXMax - blockXMin, VisibleHeight = elem.VisibleHeight, Characters = chars, Childs = textElementNeighbours, }; blockElem.Matrix.Matrix[0, 2] = blockXMin; textElementsCondensed.Add(blockElem); } _textElements = textElementsCondensed; }