static int ProcessTitle(IAdapter adapter, ColumnOrdering columnOrdering) { int row = 0; string title = null; string ministry = null; int? year = null; bool findTitle = false; bool prevRowIsSection = false; while (true) { var currRow = adapter.GetCells(row); string section_text; bool isSection = IAdapter.IsSectionRow(currRow, adapter.GetColsCount(), prevRowIsSection, out section_text); if (isSection) { if (section_text.Length > 20) { if (GetValuesFromTitle(section_text, ref title, ref year, ref ministry)) { findTitle = true; } } else { columnOrdering.Section = section_text; } } else if (WeakHeaderCheck(currRow)) { break; } row += 1; if (row >= adapter.GetRowsCount()) { row = 0; break; throw new ColumnDetectorException(String.Format("Headers not found")); } prevRowIsSection = isSection; } if (!findTitle) { if (GetValuesFromTitle(adapter.GetTitleOutsideTheTable(), ref title, ref year, ref ministry)) { findTitle = true; } } if (findTitle) { columnOrdering.Title = title; columnOrdering.Year = year; columnOrdering.MinistryName = ministry; } return(row); }
static List <Cell> FindSubcellsUnder(IAdapter adapter, Cell cell) { var subCells = new List <Cell>(); if (cell.Row + cell.MergedRowsCount >= adapter.GetRowsCount()) { return(subCells); } if (cell.CellWidth == 0 && cell.GetText(true).Trim() == "") { return(subCells); } var undercCells = adapter.GetCells(cell.Row + cell.MergedRowsCount); foreach (var underCell in undercCells) { if (underCell.Col < cell.Col) { continue; } if (underCell.Col >= cell.Col + cell.MergedColsCount) { break; } if (!underCell.IsEmpty) { subCells.Add(underCell); } } return(subCells); }
public static DeclarationField PredictEmptyColumnTitle(IAdapter adapter, Cell headerCell) { List <string> texts = new List <string>(); int rowIndex = headerCell.Row + headerCell.MergedRowsCount; const int maxRowToCollect = 10; for (int i = 0; i < maxRowToCollect; i++) { var cells = adapter.GetCells(rowIndex, IAdapter.MaxColumnsCount); string dummy; if (IAdapter.IsSectionRow(cells, adapter.GetColsCount(), false, out dummy)) { rowIndex += 1; } else { var c = adapter.GetCell(rowIndex, headerCell.Col); if (c != null) { texts.Add(c.GetText(true)); rowIndex += c.MergedRowsCount; } else { rowIndex += 1; } } if (rowIndex >= adapter.GetRowsCount()) { break; } } var field = PredictByStrings(texts); if (headerCell.TextAbove != null && ((field & DeclarationField.AllOwnTypes) > 0)) { string h = headerCell.TextAbove; // AllOwnTypes defined from field &= ~DeclarationField.AllOwnTypes; if (HeaderHelpers.IsMixedColumn(h)) { field |= DeclarationField.Mixed; } else if (HeaderHelpers.IsStateColumn(h)) { field |= DeclarationField.State; } else if (HeaderHelpers.IsOwnedColumn(h)) { field |= DeclarationField.Owned; } } Logger.Debug(string.Format("predict by {0} -> {1}", String.Join("\\n", texts), field)); return(field); }
static public List <Cell> GetColumnCells(IAdapter adapter, int headerStartRow, out int headerEndRow) { headerEndRow = headerStartRow + 1; var firstRow = adapter.GetCells(headerStartRow); List <Cell> columnCells = new List <Cell>(); bool headerCanHaveSecondLevel = true; int maxMergedRows = 1; var texts = new List <string>(); foreach (var cell in firstRow) { string text = cell.GetText(true); if (adapter.GetRowsCount() == cell.MergedRowsCount) { continue; } if (cell.CellWidth == 0 && text.Trim() == "") { continue; } if (maxMergedRows < cell.MergedRowsCount) { maxMergedRows = cell.MergedRowsCount; } var underCells = FindSubcellsUnder(adapter, cell); if (underCells.Count() <= 1 || !headerCanHaveSecondLevel) { headerEndRow = Math.Max(headerEndRow, cell.Row + cell.MergedRowsCount); // иногда в двухярусном заголовке в верхней клетке пусто, а в нижней есть заголовок (TwoRowHeaderEmptyTopCellTest) if (text.Trim() == "" && cell.MergedRowsCount < maxMergedRows && underCells.Count() == 1) { columnCells.Add(underCells.First()); } else { columnCells.Add(cell); } texts.Add(cell.Text.NormSpaces()); // обработка ошибки документа DepEnergo2010 if (columnCells.Count == 1 && cell.MergedRowsCount == 1 && underCells.Count == 1) { string cellBelowName = underCells[0].GetText(true); headerCanHaveSecondLevel = cellBelowName.Length < 5; } } // current cell spans several columns, so the header probably occupies two rows instead of just one // with the second row reserved for subheaders else { foreach (var underCell in underCells) { underCell.TextAbove = cell.Text.NormSpaces(); columnCells.Add(underCell); texts.Add(underCell.TextAbove + "^" + underCell.Text.NormSpaces()); } headerEndRow = Math.Max(headerEndRow, underCells[0].Row + underCells[0].MergedRowsCount); } } Logger.Debug("column titles: " + String.Join("|", texts)); return(columnCells); }