/// <summary> /// Process the page. /// </summary> public void Process() { var page = document.GetPage(pageNumber); textElements = new List <TextElement>(); spatialIndex = new RectangleSpatialIndex <TextElement>(); foreach (var letter in page.Letters) { string c = letter.Value; // if c not printable, return if (!IsPrintable(c)) { continue; } if (c.Equals(NBSP)) { c = " "; // replace non-breaking space for space } double wos = GetExpectedWhitespaceSize(letter); //textPosition.getWidthOfSpace(); TextElement te = new TextElement(GetBbox(letter), letter.Font, letter.PointSize, c, wos, letter.GlyphRectangle.Rotation) { letter = letter }; if (!string.IsNullOrWhiteSpace(c)) { this.minCharWidth = Math.Min(this.minCharWidth, te.Width); } if (!string.IsNullOrWhiteSpace(c)) { this.minCharHeight = Math.Min(this.minCharHeight, Math.Max(te.Height, 1)); // added by bobld: min height value to 1 } countHeight++; totalHeight += Math.Max(te.Height, 1); // added by bobld: min height value to 1 double avgHeight = totalHeight / countHeight; if (avgHeight > 0 && te.Height >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD) && (te.GetText()?.Trim().Equals("") != false)) { continue; } textElements.Add(te); spatialIndex.Add(te); } }
/// <summary> /// Create a new page area. /// </summary> /// <param name="area"></param> /// <param name="rotation"></param> /// <param name="pageNumber"></param> /// <param name="pdPage"></param> /// <param name="doc"></param> /// <param name="characters"></param> /// <param name="rulings"></param> /// <param name="minCharWidth"></param> /// <param name="minCharHeight"></param> /// <param name="index"></param> public PageArea(PdfRectangle area, int rotation, int pageNumber, Page pdPage, PdfDocument doc, List <TextElement> characters, List <Ruling> rulings, double minCharWidth, double minCharHeight, RectangleSpatialIndex <TextElement> index) : base(area) { this.Rotation = rotation; this.PageNumber = pageNumber; this.PdfPage = pdPage; this.PdfDocument = doc; this.texts = characters; this.rulings = rulings; this.MinCharHeight = minCharHeight; this.MinCharWidth = minCharWidth; this.spatial_index = index; }