void InsertRowSpanCells(int start, int end) { if (start + 1 >= end) { return; } for ( ; start < end; ++start) { var firstLine = TableRows[start]; for (int cellIndex = 0; cellIndex < firstLine.Count; ++cellIndex) { if (firstLine[cellIndex].MergedRowsCount > 1 && firstLine[cellIndex].FirstMergedRow == start) { for (int rowIndex = start + 1; rowIndex < start + firstLine[cellIndex].MergedRowsCount; ++rowIndex) { if (rowIndex >= TableRows.Count) { break; // #-max-rows 100 } var additCell = new HtmlAdapterCell(rowIndex, cellIndex); additCell.FirstMergedRow = start; additCell.MergedRowsCount = firstLine[cellIndex].MergedRowsCount - rowIndex; additCell.CellWidth = firstLine[cellIndex].CellWidth; if (cellIndex < TableRows[rowIndex].Count) { TableRows[rowIndex].Insert(cellIndex, additCell); } else { TableRows[rowIndex].Add(additCell); } for (int afterCellIndex = cellIndex + 1; afterCellIndex < TableRows[rowIndex].Count; ++afterCellIndex) { TableRows[rowIndex][afterCellIndex].Col += firstLine[cellIndex].MergedColsCount; } } } } } }
void ProcessHtmlTable(HtmlDocHolder docHolder, IElement table, int maxRowsToProcess) { var rows = GetHtmlTableRows(table); int saveRowsCount = TableRows.Count; int maxCellsCount = 0; int maxSumSpan = 0; for (int r = 0; r < rows.Count(); ++r) { List <HtmlAdapterCell> newRow = new List <HtmlAdapterCell>(); int sumspan = 0; var row = rows[r]; bool isEmpty = true; foreach (var rowCell in GetHtmlTableCells(rows[r])) { var c = new HtmlAdapterCell(docHolder, rowCell, TableRows.Count, sumspan); newRow.Add(c); for (int k = 1; k < c.MergedColsCount; ++k) { newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k)); } sumspan += c.MergedColsCount; isEmpty = isEmpty && c.IsEmpty; } if (isEmpty) { continue; } maxCellsCount = Math.Max(newRow.Count, maxCellsCount); maxSumSpan = Math.Max(sumspan, maxSumSpan); // see 7007_8.html in tests for (int k = sumspan; k < maxSumSpan; ++k) { newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k)); } if (r == 0 && TableRows.Count > 0 && BigramsHolder.CheckMergeRow( TableRows.Last().ConvertAll(x => x.Text), newRow.ConvertAll(x => x.Text))) { MergeRow(TableRows.Last(), newRow); } else { TableRows.Add(newRow); } if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess)) { break; } } if (saveRowsCount < TableRows.Count) { if (maxCellsCount <= 4) { //remove this suspicious table TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } else { InsertRowSpanCells(saveRowsCount, TableRows.Count); if (CheckNameColumnIsEmpty(saveRowsCount)) { TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } } } }