private void SplitTextElements() { var textElementsSplitted = new List <PdfTextElement>(); while (_textElements.Count > 0) { PdfTextElement elem = _textElements[0]; _textElements.Remove(elem); double maxWidth = elem.MaxWidth(); int prevBreak = 0; for (int i = 1; i < elem.Characters.Count; i++) { double prevCharEnd = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width; double charSeparation = elem.Characters[i].Displacement - prevCharEnd; if (charSeparation > maxWidth) { PdfTextElement partElem = elem.SubPart(prevBreak, i); textElementsSplitted.Add(partElem); prevBreak = i; } } if (prevBreak == 0) { textElementsSplitted.Add(elem); continue; } PdfTextElement lastElem = elem.SubPart(prevBreak, elem.Characters.Count); textElementsSplitted.Add(lastElem); } _textElements = textElementsSplitted; }
private void JoinTextElements() { var textElementsCondensed = new List <PdfTextElement>(); while (_textElements.Count > 0) { PdfTextElement elem = _textElements[0]; _textElements.Remove(elem); double blockY = elem.GetY(); double blockXMin = elem.GetX(); double blockXMax = blockXMin + elem.VisibleWidth; // Prepare first neighbour var textElementNeighbours = new List <PdfTextElement>(); textElementNeighbours.Add(elem); // Search Neighbours int i = 0; while (i < _textElements.Count) { PdfTextElement neighbour = _textElements[i]; if (neighbour.Font != elem.Font || neighbour.FontSize != elem.FontSize) { i++; continue; } double neighbourY = neighbour.GetY(); if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; } double maxWidth = neighbour.MaxWidth(); double neighbourXMin = neighbour.GetX(); double neighbourXMax = neighbourXMin + neighbour.VisibleWidth; double auxBlockXMin = blockXMin - maxWidth; double auxBlockXMax = blockXMax + maxWidth; if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin) { _textElements.Remove(neighbour); textElementNeighbours.Add(neighbour); if (blockXMax < neighbourXMax) { blockXMax = neighbourXMax; } if (blockXMin > neighbourXMin) { blockXMin = neighbourXMin; } i = 0; continue; } i++; } if (textElementNeighbours.Count == 1) { textElementsCondensed.Add(elem); continue; } // Join neighbours var chars = new List <PdfCharElement>(); foreach (PdfTextElement neighbour in textElementNeighbours) { double neighbourXMin = neighbour.GetX(); foreach (PdfCharElement c in neighbour.Characters) { chars.Add(new PdfCharElement { Char = c.Char, Displacement = (c.Displacement + neighbourXMin) - blockXMin, Width = c.Width, }); } } chars = chars.OrderBy(c => c.Displacement).ToList(); var sbText = new StringBuilder(); foreach (PdfCharElement c in chars) { sbText.Append(c.Char); } PdfTextElement blockElem = new PdfTextElement { Font = null, FontSize = elem.FontSize, Matrix = elem.Matrix.Copy(), RawText = sbText.ToString(), VisibleText = sbText.ToString(), VisibleWidth = blockXMax - blockXMin, VisibleHeight = elem.VisibleHeight, Characters = chars, Childs = textElementNeighbours, }; blockElem.Matrix.Matrix[0, 2] = blockXMin; textElementsCondensed.Add(blockElem); } _textElements = textElementsCondensed; }