void ProcessWordTable(WordDocHolder docHolder, Table table, int maxRowsToProcess) { var rows = table.Descendants <TableRow>().ToList(); TableWidthInfo widthInfo = InitializeTableWidthInfo(docHolder, table); int saveRowsCount = TableRows.Count; int maxCellsCount = 0; for (int r = 0; r < rows.Count(); ++r) { List <OpenXmlWordCell> newRow = new List <OpenXmlWordCell>(); int sumspan = 0; var row = rows[r]; int rowGridBefore = GetRowGridBefore(row); bool isEmpty = true; foreach (var rowCell in row.Elements <TableCell>()) { var c = new OpenXmlWordCell(docHolder, widthInfo, rowCell, TableRows.Count, sumspan); if (newRow.Count == 0) { c.MergedColsCount += rowGridBefore; } newRow.Add(c); sumspan += c.MergedColsCount; isEmpty = isEmpty && c.IsEmpty; } if (isEmpty) { continue; } maxCellsCount = Math.Max(newRow.Count, maxCellsCount); if (r == 0 && TableRows.Count > 0 && BigramsHolder.CheckMergeRow( TableRows.Last().ConvertAll(x => x.Text), newRow.ConvertAll(x => x.Text))) { MergeRow(TableRows.Last(), newRow); } else { TableRows.Add(newRow); } if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess)) { break; } } if (maxCellsCount <= 4 || CheckNameColumnIsEmpty(TableRows, saveRowsCount)) { //remove this suspicious table TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } }
void ProcessHtmlTable(HtmlDocHolder docHolder, IElement table, int maxRowsToProcess) { var rows = GetHtmlTableRows(table); int saveRowsCount = TableRows.Count; int maxCellsCount = 0; int maxSumSpan = 0; for (int r = 0; r < rows.Count(); ++r) { List <HtmlAdapterCell> newRow = new List <HtmlAdapterCell>(); int sumspan = 0; var row = rows[r]; bool isEmpty = true; foreach (var rowCell in GetHtmlTableCells(rows[r])) { var c = new HtmlAdapterCell(docHolder, rowCell, TableRows.Count, sumspan); newRow.Add(c); for (int k = 1; k < c.MergedColsCount; ++k) { newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k)); } sumspan += c.MergedColsCount; isEmpty = isEmpty && c.IsEmpty; } if (isEmpty) { continue; } maxCellsCount = Math.Max(newRow.Count, maxCellsCount); maxSumSpan = Math.Max(sumspan, maxSumSpan); // see 7007_8.html in tests for (int k = sumspan; k < maxSumSpan; ++k) { newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k)); } if (r == 0 && TableRows.Count > 0 && BigramsHolder.CheckMergeRow( TableRows.Last().ConvertAll(x => x.Text), newRow.ConvertAll(x => x.Text))) { MergeRow(TableRows.Last(), newRow); } else { TableRows.Add(newRow); } if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess)) { break; } } if (saveRowsCount < TableRows.Count) { if (maxCellsCount <= 4) { //remove this suspicious table TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } else { InsertRowSpanCells(saveRowsCount, TableRows.Count); if (CheckNameColumnIsEmpty(TableRows, saveRowsCount)) { TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } } } }