Exemplo n.º 1
0
        /// <summary>
        /// Process the page.
        /// </summary>
        public void Process()
        {
            var page = document.GetPage(pageNumber);

            textElements = new List <TextElement>();
            spatialIndex = new RectangleSpatialIndex <TextElement>();

            foreach (var letter in page.Letters)
            {
                string c = letter.Value;

                // if c not printable, return
                if (!IsPrintable(c))
                {
                    continue;
                }

                if (c.Equals(NBSP))
                {
                    c = " "; // replace non-breaking space for space
                }

                double wos = GetExpectedWhitespaceSize(letter); //textPosition.getWidthOfSpace();

                TextElement te = new TextElement(GetBbox(letter), letter.Font, letter.PointSize, c, wos, letter.GlyphRectangle.Rotation)
                {
                    letter = letter
                };

                if (!string.IsNullOrWhiteSpace(c))
                {
                    this.minCharWidth = Math.Min(this.minCharWidth, te.Width);
                }
                if (!string.IsNullOrWhiteSpace(c))
                {
                    this.minCharHeight = Math.Min(this.minCharHeight, Math.Max(te.Height, 1));                                // added by bobld: min height value to 1
                }
                countHeight++;
                totalHeight += Math.Max(te.Height, 1); // added by bobld: min height value to 1
                double avgHeight = totalHeight / countHeight;

                if (avgHeight > 0 && te.Height >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD) && (te.GetText()?.Trim().Equals("") != false))
                {
                    continue;
                }

                textElements.Add(te);
                spatialIndex.Add(te);
            }
        }
Exemplo n.º 2
0
        private void AddCells(List <Cell> cells)
        {
            if (cells.Count == 0)
            {
                return;
            }

            foreach (Cell ce in cells)
            {
                si.Add(ce);
            }

            List <List <Cell> > rowsOfCells = RowsOfCells(cells);
            var siBounds = si.GetBounds();

            for (int i = 0; i < rowsOfCells.Count; i++)
            {
                List <Cell> row      = rowsOfCells[i];
                var         rowCells = row.GetEnumerator();

                rowCells.MoveNext();
                Cell cell = rowCells.Current;

                // BobLd: careaful here!!
                List <List <Cell> > others = RowsOfCells(
                    si.Contains(
                        //new TableRectangle(cell.getBottom(), //top
                        //                   si.getBounds().getLeft(), // left
                        //                   cell.getLeft() - si.getBounds().getLeft(),//width
                        //                   si.getBounds().getBottom() - cell.getBottom()) // height

                        // BobLd: really not sure here
                        new PdfRectangle(siBounds.Left, siBounds.Bottom, cell.Left, cell.Bottom)
                        ));
                int startColumn = 0;
                foreach (List <Cell> r in others)
                {
                    startColumn = Math.Max(startColumn, r.Count);
                }

                this.Add(cell, i, startColumn++);
                while (rowCells.MoveNext())
                {
                    this.Add(rowCells.Current, i, startColumn++);
                }
            }
        }