public DataRow(IAdapter adapter, TableHeader columnOrdering, int row) { this.row = row; this.adapter = adapter; this.ColumnOrdering = columnOrdering; Cells = adapter.GetDataCells(row, columnOrdering.GetMaxColumnEndIndex()); if (!this.adapter.IsExcel()) { MapCells(); } }
public TJsonTablePortion TablePortionToJson(TableHeader columnOrdering, int body_start, int body_end) { var table = new TJsonTablePortion(); table.DataStart = body_start; int headerEnd = columnOrdering.GetPossibleHeaderEnd(); for (int i = columnOrdering.GetPossibleHeaderBegin(); i < columnOrdering.GetPossibleHeaderEnd(); i++) { var row = GetJsonByRow(GetDataCells(i)); table.Header.Add(row); } // find section before data for (int i = body_start; i >= headerEnd; i--) { string dummy; // cannot use prevRowIsSection var row = GetDataCells(i); if (IsSectionRow(i, row, columnOrdering.GetMaxColumnEndIndex(), false, out dummy)) { table.Section.Add(GetJsonByRow(row)); break; } } int maxRowsCount = body_end - body_start; table.DataEnd = body_start; int addedRows = 0; while (table.DataEnd < GetRowsCount() && addedRows < maxRowsCount) { if (!IsEmptyRow(table.DataEnd)) { table.Data.Add(GetJsonByRow(GetDataCells(table.DataEnd))); addedRows++; } table.DataEnd++; } return(table); }
public void FindBordersAndPersonNames(TableHeader columnOrdering, bool updateTrigrams) { int rowOffset = columnOrdering.FirstDataRow; if (columnOrdering.Section != null) { CreateNewSection(rowOffset, columnOrdering.Section); } bool skipEmptyPerson = false; string prevPersonName = ""; for (int row = rowOffset; row < Adapter.GetRowsCount(); row++) { DataRow currRow = Adapter.GetRow(columnOrdering, row); if (currRow == null || currRow.IsEmpty()) { continue; } if (IAdapter.IsNumbersRow(currRow.Cells)) { continue; } Logger.Debug(String.Format("currRow {0}, col_count={1}: {2}", row, currRow.Cells.Count, currRow.DebugString())); string sectionName; if (Adapter.IsSectionRow(row, currRow.Cells, columnOrdering.GetMaxColumnEndIndex(), false, out sectionName)) { CreateNewSection(row, sectionName); continue; } { TableHeader newColumnOrdering; if (IsHeaderRow(currRow, out newColumnOrdering)) { columnOrdering = newColumnOrdering; Logger.Debug(String.Format("found a new table header {0}", currRow.DebugString())); row = newColumnOrdering.GetPossibleHeaderEnd() - 1; // row++ in "for" cycle continue; } } if (updateTrigrams) { ColumnByDataPredictor.UpdateByRow(columnOrdering, currRow); } if (!currRow.InitPersonData(prevPersonName)) { // be robust, ignore errors see 8562.pdf.docx in tests continue; } if (currRow.PersonName != String.Empty) { prevPersonName = currRow.PersonName; CreateNewDeclarant(Adapter, currRow); if (CurrentPerson != null) { skipEmptyPerson = false; } } else if (currRow.RelativeType != String.Empty) { if (!skipEmptyPerson) { try { CreateNewRelative(currRow); } catch (SmartParserRelativeWithoutPersonException e) { skipEmptyPerson = true; Logger.Error(e.Message); continue; } } } else { if (CurrentPerson == null && FailOnRelativeOrphan) { skipEmptyPerson = true; Logger.Error(String.Format("No person to attach info on row={0}", row)); continue; } } if (!skipEmptyPerson) { AddInputRowToCurrentPerson(columnOrdering, currRow); if (_Declaration.Properties.Year == null && columnOrdering.ContainsField(DeclarationField.IncomeYear)) { var incomeYear = currRow.GetDeclarationField(DeclarationField.IncomeYear); if (incomeYear != null) { _Declaration.Properties.Year = int.Parse(incomeYear.Text); } } } } if (updateTrigrams) { ColumnByDataPredictor.WriteData(); } Logger.Info("Parsed {0} declarants", _Declaration.PublicServants.Count()); }