void ProcessWordTable(WordDocHolder docHolder, Table table, int maxRowsToProcess) { var rows = table.Descendants <TableRow>().ToList(); TableWidthInfo widthInfo = InitializeTableWidthInfo(docHolder, table); int saveRowsCount = TableRows.Count; int maxCellsCount = 0; TableBorders tblBorders = GetTableBorders(table); for (int r = 0; r < rows.Count(); ++r) { OpenXmlTableRow newRow = new OpenXmlTableRow(); int sumspan = 0; var tableRow = rows[r]; int rowGridBefore = GetRowGridBefore(tableRow); bool isEmpty = true; var row = tableRow.Elements <TableCell>().ToArray(); for (var i = 0; i < row.Length; ++i) { var c = new OpenXmlWordCell(docHolder, row, i, widthInfo, TableRows.Count, sumspan, tblBorders); if (newRow.RowCells.Count == 0) { c.MergedColsCount += rowGridBefore; } if (newRow.RowCells.Count > 0 && !newRow.RowCells.Last().HasRightBorder) { newRow.RowCells.Last().Text += c.Text; newRow.RowCells.Last().CellWidth += c.CellWidth; newRow.RowCells.Last().MergedColsCount += c.MergedColsCount; newRow.RowCells.Last().HasRightBorder = c.HasRightBorder; sumspan += c.MergedColsCount; } else { newRow.RowCells.Add(c); sumspan += c.MergedColsCount; } isEmpty = isEmpty && c.IsEmpty; } if (isEmpty) { continue; } maxCellsCount = Math.Max(newRow.RowCells.Count, maxCellsCount); if (r == 0 && TableRows.Count > 0 && BigramsHolder.CheckMergeRow( TableRows.Last().RowCells.ConvertAll(x => x.Text), newRow.RowCells.ConvertAll(x => x.Text))) { MergeRow(TableRows.Last().RowCells, newRow.RowCells); } else { TableRows.Add(newRow); } if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess)) { break; } } if ((TableRows.Count > 0) && !TableHeaderRecognizer.IsNamePositionAndIncomeTable(GetDataCells(0))) { if (maxCellsCount <= 4 || CheckNameColumnIsEmpty(saveRowsCount)) { //remove this suspicious table TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } } }
void ProcessHtmlTable(HtmlDocHolder docHolder, IElement table, int maxRowsToProcess) { var rows = GetHtmlTableRows(table); int saveRowsCount = TableRows.Count; int maxCellsCount = 0; int maxSumSpan = 0; for (int r = 0; r < rows.Count(); ++r) { List <HtmlAdapterCell> newRow = new List <HtmlAdapterCell>(); int sumspan = 0; var row = rows[r]; bool isEmpty = true; foreach (var rowCell in GetHtmlTableCells(rows[r])) { var c = new HtmlAdapterCell(docHolder, rowCell, TableRows.Count, sumspan); newRow.Add(c); for (int k = 1; k < c.MergedColsCount; ++k) { newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k)); } sumspan += c.MergedColsCount; isEmpty = isEmpty && c.IsEmpty; } if (isEmpty) { continue; } maxCellsCount = Math.Max(newRow.Count, maxCellsCount); maxSumSpan = Math.Max(sumspan, maxSumSpan); // see 7007_8.html in tests for (int k = sumspan; k < maxSumSpan; ++k) { newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k)); } if (r == 0 && TableRows.Count > 0 && BigramsHolder.CheckMergeRow( TableRows.Last().ConvertAll(x => x.Text), newRow.ConvertAll(x => x.Text))) { MergeRow(TableRows.Last(), newRow); } else { TableRows.Add(newRow); } if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess)) { break; } } if (saveRowsCount < TableRows.Count) { if (maxCellsCount <= 4) { //remove this suspicious table TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } else { InsertRowSpanCells(saveRowsCount, TableRows.Count); if (CheckNameColumnIsEmpty(saveRowsCount)) { TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } } } }