示例#1
0
        /// <summary>
        /// Joins the horizontal and vertical lines.
        /// </summary>
        /// <param name="allLines">All the lines.</param>
        /// <returns>The orizontal and the vertical lines (eventually joined)</returns>
        private List <Line> JoinLines(List <Line> allLines)
        {
            JoinedVerticalLines   = JoinVerticalLines(allLines);
            JoinedHorizontalLines = JoinHorizontalLines(allLines);

            return(JoinedHorizontalLines.Union(JoinedVerticalLines).ToList());
        }
示例#2
0
        /// <summary>
        /// Determines the table structures.
        /// </summary>
        public void DetermineTableStructures()
        {
            JoinedLines = JoinLines(AllLines);

            // Find table borders
            foreach (Line horizontalLine in JoinedHorizontalLines.OrderBy(_ => _.StartPoint.Y))
            {
                // We consider that this line is a top line of a table if
                // 1. There is not a table with this line inside
                // 2. There is a vertical line starting from this line

                if (Tables.Any(_ => _.Contains(horizontalLine.StartPoint.Y)))
                {
                    continue;
                }

                Line?tableLine = JoinedVerticalLines
                                 .Where(_ => _.StartPoint == horizontalLine.StartPoint || _.StartPoint == horizontalLine.EndPoint)
                                 .OrderByDescending(_ => _.EndPoint.Y - _.StartPoint.Y)
                                 .Cast <Line?>()
                                 .FirstOrDefault();

                if (tableLine == null)
                {
                    continue;
                }

                Table tableStructure = new Table()
                {
                    TopLeftPoint     = horizontalLine.StartPoint,
                    BottomRightPoint = new Point(horizontalLine.EndPoint.X, tableLine.Value.EndPoint.Y)
                };

                Tables.Add(tableStructure);
            }

            // Add the first row and the first column to all tables
            foreach (Table tableStructure in Tables)
            {
                tableStructure.Rows.Add(new Row()
                {
                    BeginY = tableStructure.TopLeftPoint.Y
                });
                tableStructure.Columns.Add(new Column()
                {
                    BeginX = tableStructure.TopLeftPoint.X
                });
            }

            // Find rows
            foreach (Line horizontalLine in JoinedHorizontalLines.OrderBy(_ => _.StartPoint.Y))
            {
                var tableStructure = Tables.FirstOrDefault(_ => _.Contains(horizontalLine));
                // No table contains this line
                if (tableStructure == null)
                {
                    continue;
                }

                // Check if the row already belongs to the table
                if (tableStructure.Rows.Any(_ => Math.Abs(_.BeginY - horizontalLine.StartPoint.Y) < ContentExtractor.Tolerance))
                {
                    continue;
                }

                // Check if the row is the bottom edge of the table
                if (tableStructure.BottomRightPoint.Y - horizontalLine.StartPoint.Y < ContentExtractor.Tolerance)
                {
                    continue;
                }

                tableStructure.Rows.Add(new Row()
                {
                    BeginY = horizontalLine.StartPoint.Y
                });
            }

            // Find columns
            foreach (Line verticalLine in JoinedVerticalLines.OrderBy(_ => _.StartPoint.X))
            {
                var tableStructure = Tables.FirstOrDefault(_ => _.Contains(verticalLine));
                // No table contains this line
                if (tableStructure == null)
                {
                    continue;
                }

                // The row already belongs to the table
                if (tableStructure.Columns.Any(_ => Math.Abs(_.BeginX - verticalLine.StartPoint.X) < ContentExtractor.Tolerance))
                {
                    continue;
                }

                // Check if the row is the bottom edge of the table
                if (tableStructure.BottomRightPoint.X - verticalLine.StartPoint.X < ContentExtractor.Tolerance)
                {
                    continue;
                }


                tableStructure.Columns.Add(new Column()
                {
                    BeginX = verticalLine.StartPoint.X
                });
            }


            // Fix EndX and EndY and indexes
            foreach (Table tableStructure in Tables)
            {
                // Fix EndYs
                for (int i = 0; i < tableStructure.Rows.Count - 1; i++)
                {
                    tableStructure.Rows[i].EndY = tableStructure.Rows[i + 1].BeginY - ContentExtractor.Tolerance * 0.1f;
                }

                tableStructure.Rows[tableStructure.Rows.Count - 1].EndY = tableStructure.BottomRightPoint.Y;


                // Fix EndXs
                for (int i = 0; i < tableStructure.Columns.Count - 1; i++)
                {
                    tableStructure.Columns[i].EndX = tableStructure.Columns[i + 1].BeginX - ContentExtractor.Tolerance * 0.1f;
                }

                tableStructure.Columns[tableStructure.Columns.Count - 1].EndX = tableStructure.BottomRightPoint.X;

                int index;

                index = 0;
                foreach (var column in tableStructure.Columns.OrderBy(_ => _.BeginX))
                {
                    column.Index = index;
                    index++;
                }

                index = 0;
                foreach (var row in tableStructure.Rows.OrderByDescending(_ => _.BeginY))
                {
                    row.Index = index;
                    index++;
                }

                tableStructure.CreateContent();
            }
        }