/// <summary> /// Process the page. /// </summary> public void Process() { var page = document.GetPage(pageNumber); textElements = new List <TextElement>(); spatialIndex = new RectangleSpatialIndex <TextElement>(); foreach (var letter in page.Letters) { string c = letter.Value; // if c not printable, return if (!IsPrintable(c)) { continue; } if (c.Equals(NBSP)) { c = " "; // replace non-breaking space for space } double wos = GetExpectedWhitespaceSize(letter); //textPosition.getWidthOfSpace(); TextElement te = new TextElement(GetBbox(letter), letter.Font, letter.PointSize, c, wos, letter.GlyphRectangle.Rotation) { letter = letter }; if (!string.IsNullOrWhiteSpace(c)) { this.minCharWidth = Math.Min(this.minCharWidth, te.Width); } if (!string.IsNullOrWhiteSpace(c)) { this.minCharHeight = Math.Min(this.minCharHeight, Math.Max(te.Height, 1)); // added by bobld: min height value to 1 } countHeight++; totalHeight += Math.Max(te.Height, 1); // added by bobld: min height value to 1 double avgHeight = totalHeight / countHeight; if (avgHeight > 0 && te.Height >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD) && (te.GetText()?.Trim().Equals("") != false)) { continue; } textElements.Add(te); spatialIndex.Add(te); } }
private void AddCells(List <Cell> cells) { if (cells.Count == 0) { return; } foreach (Cell ce in cells) { si.Add(ce); } List <List <Cell> > rowsOfCells = RowsOfCells(cells); var siBounds = si.GetBounds(); for (int i = 0; i < rowsOfCells.Count; i++) { List <Cell> row = rowsOfCells[i]; var rowCells = row.GetEnumerator(); rowCells.MoveNext(); Cell cell = rowCells.Current; // BobLd: careaful here!! List <List <Cell> > others = RowsOfCells( si.Contains( //new TableRectangle(cell.getBottom(), //top // si.getBounds().getLeft(), // left // cell.getLeft() - si.getBounds().getLeft(),//width // si.getBounds().getBottom() - cell.getBottom()) // height // BobLd: really not sure here new PdfRectangle(siBounds.Left, siBounds.Bottom, cell.Left, cell.Bottom) )); int startColumn = 0; foreach (List <Cell> r in others) { startColumn = Math.Max(startColumn, r.Count); } this.Add(cell, i, startColumn++); while (rowCells.MoveNext()) { this.Add(rowCells.Current, i, startColumn++); } } }