public static bool CheckPersonName(String s) { if (s.Contains('.')) { return(true); } bool hasSpaces = s.Trim().Any(Char.IsWhiteSpace); if (!hasSpaces) { return(false); } string[] words = Regex.Split(s, @"[\,\s\n]+"); if (TextHelpers.CanBePatronymic(words[words.Length - 1])) { return(true); } if (words.Count() != 3) { var predictedField = ColumnByDataPredictor.PredictByString(s); if (!HeaderHelpers.IsNameDeclarationField(predictedField)) { return(false); } } return(true); }
static public void MapColumnTitlesToInnerConstants(IAdapter adapter, List <Cell> cells, TableHeader columnOrdering) { foreach (var cell in cells) { string text = cell.GetText(true); Logger.Debug(string.Format("column title: \"{0}\"[{1}]", text.ReplaceEolnWithSpace().CoalesceWhitespace(), cell.CellWidth)); DeclarationField field; string clean_text = AbsenceMarkers.Aggregate(text, (x, y) => x.Replace(y, "")).Trim(); if (adapter.GetRowsCount() == cell.MergedRowsCount) { continue; } if ((text == "" || clean_text.Length <= 1) && (text != "№")) { // too short title, try to predict by values field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell); Logger.Debug("Predict: " + field.ToString()); } else { field = HeaderHelpers.TryGetField(cell.TextAbove, text); if ((field == DeclarationField.None) && clean_text.Length <= 4) { field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell); Logger.Debug("Predict: " + field.ToString()); } if (field == DeclarationField.None) { throw new SmartParserException(String.Format("Cannot recognize field \"{0}\"", text.Replace('\n', ' '))); } } if (field == DeclarationField.None && !DataHelper.IsEmptyValue(text)) { throw new ColumnDetectorException(String.Format("Fail to detect column type row: {0} title:{1}", cell.Row, text)); } if (ColumnByDataPredictor.CalcPrecision) { ColumnByDataPredictor.PredictForPrecisionCheck(adapter, cell, field); } AddColumn(columnOrdering, field, cell); if (TableHeader.SearchForFioColumnOnly) { if (HeaderHelpers.IsNameDeclarationField(field)) { break; } } } }
public static bool TestFieldWithoutOwntypes(DeclarationField field, Cell cell) { if (cell.IsEmpty) { return(false); } string text = cell.GetText(true); if ((field & DeclarationField.SquareMask) > 0 && DataHelper.ParseSquare(text).HasValue) { return(true); } var predictedField = ColumnByDataPredictor.PredictByString(text); return((predictedField & ~DeclarationField.AllOwnTypes) == (field & ~DeclarationField.AllOwnTypes)); }
static Dictionary <DeclarationField, Cell> MapByOrderAndIntersection(TableHeader columnOrdering, List <Cell> cells) { if (columnOrdering.MergedColumnOrder.Count != cells.Count) { return(null); } int start = cells[0].AdditTableIndention; var res = new Dictionary <DeclarationField, Cell>(); int pixelErrorCount = 0; for (int i = 0; i < cells.Count; i++) { int s1 = start; int e1 = start + cells[i].CellWidth; var colInfo = columnOrdering.MergedColumnOrder[i]; int s2 = colInfo.ColumnPixelStart; int e2 = colInfo.ColumnPixelStart + colInfo.ColumnPixelWidth; if (TableHeader.PeriodIntersection(s1, e1, s2, e2) == 0) { pixelErrorCount += 1; if (!DataHelper.IsEmptyValue(cells[i].Text)) { if (!ColumnByDataPredictor.TestFieldWithoutOwntypes(colInfo.Field, cells[i])) { Logger.Debug(string.Format("cannot map column N={0} text={1}", i, cells[i].Text.Replace("\n", "\\n"))); return(null); } else { Logger.Debug(string.Format("found semantic argument for mapping N={0} text={1} to {2}", i, cells[i].Text.Replace("\n", "\\n"), colInfo.Field)); pixelErrorCount = 0; } } } res[columnOrdering.MergedColumnOrder[i].Field] = cells[i]; start = e1; } if (pixelErrorCount >= 3) { return(null); } return(res); }
public void FindBordersAndPersonNames(TableHeader columnOrdering, bool updateTrigrams) { int rowOffset = columnOrdering.FirstDataRow; if (columnOrdering.Section != null) { CreateNewSection(rowOffset, columnOrdering.Section); } bool skipEmptyPerson = false; string prevPersonName = ""; for (int row = rowOffset; row < Adapter.GetRowsCount(); row++) { DataRow currRow = Adapter.GetRow(columnOrdering, row); if (currRow == null || currRow.IsEmpty()) { continue; } if (IAdapter.IsNumbersRow(currRow.Cells)) { continue; } Logger.Debug(String.Format("currRow {0}, col_count={1}: {2}", row, currRow.Cells.Count, currRow.DebugString())); string sectionName; if (Adapter.IsSectionRow(row, currRow.Cells, columnOrdering.GetMaxColumnEndIndex(), false, out sectionName)) { CreateNewSection(row, sectionName); continue; } { TableHeader newColumnOrdering; if (IsHeaderRow(currRow, out newColumnOrdering)) { columnOrdering = newColumnOrdering; Logger.Debug(String.Format("found a new table header {0}", currRow.DebugString())); row = newColumnOrdering.GetPossibleHeaderEnd() - 1; // row++ in "for" cycle continue; } } if (updateTrigrams) { ColumnByDataPredictor.UpdateByRow(columnOrdering, currRow); } if (!currRow.InitPersonData(prevPersonName)) { // be robust, ignore errors see 8562.pdf.docx in tests continue; } if (currRow.PersonName != String.Empty) { prevPersonName = currRow.PersonName; CreateNewDeclarant(Adapter, currRow); if (CurrentPerson != null) { skipEmptyPerson = false; } } else if (currRow.RelativeType != String.Empty) { if (!skipEmptyPerson) { try { CreateNewRelative(currRow); } catch (SmartParserRelativeWithoutPersonException e) { skipEmptyPerson = true; Logger.Error(e.Message); continue; } } } else { if (CurrentPerson == null && FailOnRelativeOrphan) { skipEmptyPerson = true; Logger.Error(String.Format("No person to attach info on row={0}", row)); continue; } } if (!skipEmptyPerson) { AddInputRowToCurrentPerson(columnOrdering, currRow); if (_Declaration.Properties.Year == null && columnOrdering.ContainsField(DeclarationField.IncomeYear)) { var incomeYear = currRow.GetDeclarationField(DeclarationField.IncomeYear); if (incomeYear != null) { _Declaration.Properties.Year = int.Parse(incomeYear.Text); } } } } if (updateTrigrams) { ColumnByDataPredictor.WriteData(); } Logger.Info("Parsed {0} declarants", _Declaration.PublicServants.Count()); }