示例#1
0
        public static TableData ExtractTable(LineTable table, TablePattern pattern)
        {
            var tableInfo      = TableDataRowFinder.FindDataRowsAndColumns(table);
            var columnPatterns = AssignColumns(pattern, tableInfo);

            if (columnPatterns.Any(x => x.Key.IsRequired && x.Value == null))
            {
                return(null);
            }

            var columns = columnPatterns.Select(x => new TableDataColumn()
            {
                Name         = x.Key.Name,
                SourceBounds = x.Value?.Cells.Where(c => c.Col_Span == 1).Select(c => c.Box.Bounds).Where(b => b != new Bounds()).UnionBounds() ?? new Bounds(),
            }).ToList();

            var rows = tableInfo.Rows.Select(row =>
            {
                var rowOut = new TableDataRow()
                {
                    Values = columnPatterns.Select(col =>
                    {
                        var v = row.Values.Where(x => x.Cols.Contains(col.Value)).FirstOrDefault();
                        if (v == null)
                        {
                            return(null);
                        }

                        // Ensure value matches pattern
                        if (!col.Key.ValueMatchRegex.IsMatch(v.Cell.CellText))
                        {
                            return(null);
                        }

                        var column = columns.Where(c => c.Name == col.Key.Name).FirstOrDefault();

                        return(new TableDataValue()
                        {
                            Column = column,
                            Value = v.Cell.CellText,
                            MergeId = v.Cell.Col_Span > 1 || v.Cell.Row_Span > 1 ? "" + v.Cell.CellId : null as string,
                            SourceBounds = v.Cell.Box.Bounds,
                            FontHeight = v.Cell.Box.Texts.Average(x => x.FontHeight),
                        });
                    }).Where(x => x != null).ToList()
                };

                if (!columnPatterns.Keys.Where(x => x.IsRequired).All(x => rowOut.Values.Any(y => y.Column.Name == x.Name)))
                {
                    return(null);
                }

                return(rowOut);
            }).Where(x => x != null).ToList();

            // columns.ForEach(col => col.SourceBounds = rows.SelectMany(x => x.Values).Where(v => v.Column == col).Select(x => x.SourceBounds).UnionBounds());
            // columns.ForEach(col => col.SourceHeaderText =  rows.SelectMany(x => x.Values).Where(v => v.Column == col).Select(x => x.SourceBounds).UnionBounds());

            return(new TableData()
            {
                SourceBounds = table.Bounds,
                // SourceBounds = rows.SelectMany(x => x.Values.Select(v => v.SourceBounds)).UnionBounds(),
                SourceBounds_Cropped = rows.SelectMany(x => x.Values.Select(v => v.SourceBounds)).UnionBounds(),
                TableName = pattern.Name,
                Rows = rows,
                Columns = columns,
            });
        }
示例#2
0
        private static Dictionary <TablePatternColumn, TableDataRowFinder.ColInfo> AssignColumns(TablePattern pattern, TableDataRowFinder.TableInfo tableInfo)
        {
            var remainingCols = tableInfo.Cols.ToList();

            var colMatches = pattern.Columns.Select(c =>
            {
                var column = remainingCols.Where(x => c.ColumnMatchRegex.IsMatch(x.AllCellText) && c.ValueMatchRegex.IsMatch(x.AllCellText)).FirstOrDefault();
                remainingCols.Remove(column);
                return(new { cPattern = c, column });
            }).ToList();

            return(colMatches.ToDictionary(x => x.cPattern, x => x.column));
        }