TableWidthInfo InitializeTableWidthInfo(WordDocHolder docHolder, Table table) { TableWidthInfo widthInfo = new TableWidthInfo(); TableProperties tProp = table.GetFirstChild <TableProperties>(); if (tProp != null) { if (tProp.TableWidth != null) { widthInfo.TableWidthInPixels = TableWidthInfo.TryReadWidth( tProp.TableWidth.Width, tProp.TableWidth.Type, docHolder.DocumentPageSizeInPixels); } if (tProp.TableIndentation != null) { widthInfo.TableIndentionInPixels = TableWidthInfo.TryReadWidth( tProp.TableIndentation.Width, tProp.TableIndentation.Type, docHolder.DocumentPageSizeInPixels); } widthInfo.TableIndentionInPixels += docHolder.DocumentPageLeftMaginInPixels; } else { widthInfo.TableWidthInPixels = docHolder.DocumentPageSizeInPixels; } TableGrid tGrid = table.GetFirstChild <TableGrid>(); if (tGrid != null) { widthInfo.ColumnWidths = new List <int>(); foreach (var col in tGrid.Elements <GridColumn>()) { widthInfo.ColumnWidths.Add( TableWidthInfo.TryReadWidth( col.Width, TableWidthUnitValues.Dxa, widthInfo.TableWidthInPixels)); } } return(widthInfo); }
void InitPageSize() { var docPart = WordDocument.MainDocumentPart; var pageSize = docPart.Document.Descendants <PageSize>().FirstOrDefault(); int pageDxa = 11906; // letter size is ISO 216 A4 (210x297mm if (pageSize != null) { pageDxa = (int)(uint)pageSize.Width; } DocumentPageSizeInPixels = TableWidthInfo.DxaToPixels(pageDxa); var pageMargin = docPart.Document.Descendants <PageMargin>().FirstOrDefault(); int pageMarginDxa = 0; // letter size is ISO 216 A4 (210x297mm if (pageMargin != null && pageMargin.Left != null) { pageMarginDxa = (int)(uint)pageMargin.Left; } DocumentPageLeftMaginInPixels = TableWidthInfo.DxaToPixels(pageMarginDxa); }
void ProcessWordTable(WordDocHolder docHolder, Table table, int maxRowsToProcess) { var rows = table.Descendants <TableRow>().ToList(); TableWidthInfo widthInfo = InitializeTableWidthInfo(docHolder, table); int saveRowsCount = TableRows.Count; int maxCellsCount = 0; TableBorders tblBorders = GetTableBorders(table); for (int r = 0; r < rows.Count(); ++r) { OpenXmlTableRow newRow = new OpenXmlTableRow(); int sumspan = 0; var tableRow = rows[r]; int rowGridBefore = GetRowGridBefore(tableRow); bool isEmpty = true; var row = tableRow.Elements <TableCell>().ToArray(); for (var i = 0; i < row.Length; ++i) { var c = new OpenXmlWordCell(docHolder, row, i, widthInfo, TableRows.Count, sumspan, tblBorders); if (newRow.RowCells.Count == 0) { c.MergedColsCount += rowGridBefore; } if (newRow.RowCells.Count > 0 && !newRow.RowCells.Last().HasRightBorder) { newRow.RowCells.Last().Text += c.Text; newRow.RowCells.Last().CellWidth += c.CellWidth; newRow.RowCells.Last().MergedColsCount += c.MergedColsCount; newRow.RowCells.Last().HasRightBorder = c.HasRightBorder; sumspan += c.MergedColsCount; } else { newRow.RowCells.Add(c); sumspan += c.MergedColsCount; } isEmpty = isEmpty && c.IsEmpty; } if (isEmpty) { continue; } maxCellsCount = Math.Max(newRow.RowCells.Count, maxCellsCount); if (r == 0 && TableRows.Count > 0 && BigramsHolder.CheckMergeRow( TableRows.Last().RowCells.ConvertAll(x => x.Text), newRow.RowCells.ConvertAll(x => x.Text))) { MergeRow(TableRows.Last().RowCells, newRow.RowCells); } else { TableRows.Add(newRow); } if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess)) { break; } } if ((TableRows.Count > 0) && !TableHeaderRecognizer.IsNamePositionAndIncomeTable(GetDataCells(0))) { if (maxCellsCount <= 4 || CheckNameColumnIsEmpty(saveRowsCount)) { //remove this suspicious table TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount); } } }
public OpenXmlWordCell(WordDocHolder docHolder, TableCell[] row, int cellIndexInRow, TableWidthInfo tableWidth, int rowIndexInTable, int unmergedColumnIndex, TableBorders tblBorders) { TableCell inputCell = row[cellIndexInRow]; InitTextProperties(docHolder, inputCell); if (inputCell?.TableCellProperties?.TableCellBorders != null) { var borders = inputCell.TableCellProperties.TableCellBorders; HasBottomBorder = WordDocHolder.BorderIsVisible(borders.BottomBorder); HasTopBorder = WordDocHolder.BorderIsVisible(borders.TopBorder); HasRightBorder = WordDocHolder.BorderIsVisible(borders.RightBorder); if (!HasRightBorder && cellIndexInRow + 1 < row.Length && row[cellIndexInRow + 1].TableCellProperties?.TableCellBorders != null) { HasRightBorder = WordDocHolder.BorderIsVisible(row[cellIndexInRow + 1].TableCellProperties?.TableCellBorders?.LeftBorder); } if (!HasRightBorder && tblBorders?.InsideVerticalBorder != null && (uint)tblBorders.InsideVerticalBorder.Size > 0) { HasRightBorder = true; } } var vmerge = inputCell.TableCellProperties.GetFirstChild <VerticalMerge>(); VerticallyMerged = null; if (vmerge != null) { if ((vmerge.Val == null) || (vmerge.Val == MergedCellValues.Continue)) { // null -> MergedCellValues.Continue VerticallyMerged = MergedCellValues.Continue; } else if (vmerge.Val == MergedCellValues.Restart) { VerticallyMerged = MergedCellValues.Restart; } } if (tblBorders?.InsideHorizontalBorder != null && (uint)tblBorders.InsideHorizontalBorder.Size > 0) { TableHasInsideHorizontalBorders = true; } var gridSpan = inputCell.TableCellProperties.GetFirstChild <GridSpan>(); IsMerged = gridSpan != null && gridSpan.Val > 1; FirstMergedRow = -1; // init afterwards MergedRowsCount = -1; // init afterwards MergedColsCount = (gridSpan == null) ? 1 : (int)gridSpan.Val; Row = rowIndexInTable; Col = unmergedColumnIndex; if (inputCell.TableCellProperties != null && inputCell.TableCellProperties.TableCellWidth != null && inputCell.TableCellProperties.TableCellWidth.Type != null && inputCell.TableCellProperties.TableCellWidth.Type != TableWidthUnitValues.Auto ) { CellWidth = TableWidthInfo.TryReadWidth( inputCell.TableCellProperties.TableCellWidth.Width, inputCell.TableCellProperties.TableCellWidth.Type, tableWidth.TableWidthInPixels); } else { if (Col < tableWidth.ColumnWidths.Count) { CellWidth = tableWidth.ColumnWidths[Col]; } } AdditTableIndention = tableWidth.TableIndentionInPixels; }