static void FixBadColumnName02(TableHeader c) { //move NameAndOccupationOrRelativeType to NameOrRelativeType if Occupation is present if (c.ContainsField(DeclarationField.NameAndOccupationOrRelativeType) && c.ContainsField(DeclarationField.Occupation) ) { TColumnInfo s = c.ColumnOrder[DeclarationField.NameAndOccupationOrRelativeType]; s.Field = DeclarationField.NameOrRelativeType; c.Add(s); c.Delete(DeclarationField.NameAndOccupationOrRelativeType); } }
static void FixBadColumnName01_Template(TableHeader c, DeclarationField naturalText, DeclarationField country, DeclarationField square, DeclarationField type) { //move MixedColumnWithNaturalText to MixedRealEstateType if (!c.ContainsField(naturalText)) { return; } if (c.ContainsField(country) && c.ContainsField(square) ) { TColumnInfo s = c.ColumnOrder[naturalText]; s.Field = type; c.Add(s); c.Delete(naturalText); } }
static void FixMissingSubheadersForVehicle(IAdapter adapter, TableHeader columnOrdering) { if (!columnOrdering.ContainsField(DeclarationField.Vehicle)) { return; } TColumnInfo dummy; var headerCell = adapter.GetDeclarationFieldWeak(columnOrdering, columnOrdering.HeaderBegin.Value, DeclarationField.Vehicle, out dummy); if (headerCell.MergedColsCount != 2) { return; } var subCells = FindSubcellsUnder(adapter, headerCell); if (subCells.Count == 1) { return; } string cleanHeader = headerCell.Text.ToLower().Replace(" ", ""); if (cleanHeader.Contains("транспортныесредства") && cleanHeader.Contains("марка") && cleanHeader.Contains("вид")) { TColumnInfo columnVehicleType = new TColumnInfo(); columnVehicleType.BeginColumn = headerCell.Col; columnVehicleType.EndColumn = headerCell.Col + 1; columnVehicleType.ColumnPixelWidth = headerCell.CellWidth / 2; columnVehicleType.Field = DeclarationField.VehicleType; columnOrdering.Add(columnVehicleType); TColumnInfo columnVehicleModel = new TColumnInfo(); columnVehicleModel.BeginColumn = headerCell.Col + 1; columnVehicleModel.EndColumn = headerCell.Col + 2; columnVehicleModel.ColumnPixelWidth = headerCell.CellWidth / 2; columnVehicleModel.Field = DeclarationField.VehicleModel; columnOrdering.Add(columnVehicleModel); columnOrdering.Delete(DeclarationField.Vehicle); } }
static void FixMissingSubheadersForMergedColumns(IAdapter adapter, TableHeader columnOrdering, DeclarationField mergedField, DeclarationField[] subColumns) { if (!columnOrdering.ContainsField(mergedField)) { return; } TColumnInfo dummy; var headerCell = adapter.GetDeclarationFieldWeak(columnOrdering, columnOrdering.HeaderBegin.Value, mergedField, out dummy); var subCells = FindSubcellsUnder(adapter, headerCell); // we check only the second column, todo check the first one and the third if (subCells.Count != subColumns.Count() || !CheckSquareColumn(adapter, columnOrdering.FirstDataRow, 5, subCells, 1)) { return; } for (int i = 0; i < subColumns.Count(); ++i) { AddColumn(columnOrdering, subColumns[i], subCells[i]); } columnOrdering.Delete(mergedField); }
public void FindBordersAndPersonNames(TableHeader columnOrdering, bool updateTrigrams) { int rowOffset = columnOrdering.FirstDataRow; if (columnOrdering.Section != null) { CreateNewSection(rowOffset, columnOrdering.Section); } bool skipEmptyPerson = false; string prevPersonName = ""; for (int row = rowOffset; row < Adapter.GetRowsCount(); row++) { DataRow currRow = Adapter.GetRow(columnOrdering, row); if (currRow == null || currRow.IsEmpty()) { continue; } if (IAdapter.IsNumbersRow(currRow.Cells)) { continue; } Logger.Debug(String.Format("currRow {0}, col_count={1}: {2}", row, currRow.Cells.Count, currRow.DebugString())); string sectionName; if (Adapter.IsSectionRow(row, currRow.Cells, columnOrdering.GetMaxColumnEndIndex(), false, out sectionName)) { CreateNewSection(row, sectionName); continue; } { TableHeader newColumnOrdering; if (IsHeaderRow(currRow, out newColumnOrdering)) { columnOrdering = newColumnOrdering; Logger.Debug(String.Format("found a new table header {0}", currRow.DebugString())); row = newColumnOrdering.GetPossibleHeaderEnd() - 1; // row++ in "for" cycle continue; } } if (updateTrigrams) { ColumnByDataPredictor.UpdateByRow(columnOrdering, currRow); } if (!currRow.InitPersonData(prevPersonName)) { // be robust, ignore errors see 8562.pdf.docx in tests continue; } if (currRow.PersonName != String.Empty) { prevPersonName = currRow.PersonName; CreateNewDeclarant(Adapter, currRow); if (CurrentPerson != null) { skipEmptyPerson = false; } } else if (currRow.RelativeType != String.Empty) { if (!skipEmptyPerson) { try { CreateNewRelative(currRow); } catch (SmartParserRelativeWithoutPersonException e) { skipEmptyPerson = true; Logger.Error(e.Message); continue; } } } else { if (CurrentPerson == null && FailOnRelativeOrphan) { skipEmptyPerson = true; Logger.Error(String.Format("No person to attach info on row={0}", row)); continue; } } if (!skipEmptyPerson) { AddInputRowToCurrentPerson(columnOrdering, currRow); if (_Declaration.Properties.Year == null && columnOrdering.ContainsField(DeclarationField.IncomeYear)) { var incomeYear = currRow.GetDeclarationField(DeclarationField.IncomeYear); if (incomeYear != null) { _Declaration.Properties.Year = int.Parse(incomeYear.Text); } } } } if (updateTrigrams) { ColumnByDataPredictor.WriteData(); } Logger.Info("Parsed {0} declarants", _Declaration.PublicServants.Count()); }
// see 8562.pdf.docx in tests // calc string width using graphics.MeasureString methods bool DivideDeclarantAndRelativesBySoftEolns(TableHeader columnOrdering, DataRow row) { if (CurrentDeclarant.Relatives.Count() > 0) { return(false); } if (!columnOrdering.ContainsField(DeclarationField.NameOrRelativeType)) { return(false); } Cell nameCell = row.GetDeclarationField(DeclarationField.NameOrRelativeType); if (!(nameCell is OpenXmlWordCell) && !(nameCell is HtmlAdapterCell)) { return(false); } if (nameCell is null) { return(false); } if (nameCell.IsEmpty) { return(false); } if (row.adapter.IsExcel()) { return(false); // no font info } List <string> lines = GetLinesWithSoftBreaks(nameCell); if (lines.Count < 2) { return(false); } List <int> borders = new List <int>() { 0 }; for (int i = 1; i < lines.Count; ++i) { if (DataHelper.ParseRelationType(lines[i], false) != RelationType.Error) { borders.Add(i); } } if (borders.Count == 1) { return(false); } List <DataRow> dividedLines = new List <DataRow>(); for (int i = 0; i < borders.Count; ++i) { dividedLines.Add(row.DeepClone()); } for (int i = 0; i < row.Cells.Count; ++i) { DivideCell(row, i, borders, dividedLines); } for (int k = 0; k < borders.Count; ++k) { if (!DividedLinesToDataRows(row, dividedLines, k)) { return(false); } } Logger.Debug(String.Format("Divide line to {0} parts", borders.Count())); return(true); }