示例#1
0
        public static bool CheckPersonName(String s)
        {
            if (s.Contains('.'))
            {
                return(true);
            }

            bool hasSpaces = s.Trim().Any(Char.IsWhiteSpace);

            if (!hasSpaces)
            {
                return(false);
            }
            string[] words = Regex.Split(s, @"[\,\s\n]+");
            if (TextHelpers.CanBePatronymic(words[words.Length - 1]))
            {
                return(true);
            }
            if (words.Count() != 3)
            {
                var predictedField = ColumnByDataPredictor.PredictByString(s);
                if (!HeaderHelpers.IsNameDeclarationField(predictedField))
                {
                    return(false);
                }
            }
            return(true);
        }
示例#2
0
        static public void MapColumnTitlesToInnerConstants(IAdapter adapter, List <Cell> cells, TableHeader columnOrdering)
        {
            foreach (var cell in cells)
            {
                string text = cell.GetText(true);
                Logger.Debug(string.Format("column title: \"{0}\"[{1}]", text.ReplaceEolnWithSpace().CoalesceWhitespace(), cell.CellWidth));
                DeclarationField field;
                string           clean_text = AbsenceMarkers.Aggregate(text, (x, y) => x.Replace(y, "")).Trim();

                if (adapter.GetRowsCount() == cell.MergedRowsCount)
                {
                    continue;
                }

                if ((text == "" || clean_text.Length <= 1) && (text != "№"))
                {
                    // too short title, try to predict by values
                    field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell);
                    Logger.Debug("Predict: " + field.ToString());
                }
                else
                {
                    field = HeaderHelpers.TryGetField(cell.TextAbove, text);
                    if ((field == DeclarationField.None) && clean_text.Length <= 4)
                    {
                        field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell);
                        Logger.Debug("Predict: " + field.ToString());
                    }
                    if (field == DeclarationField.None)
                    {
                        throw new SmartParserException(String.Format("Cannot recognize field \"{0}\"", text.Replace('\n', ' ')));
                    }
                }

                if (field == DeclarationField.None && !DataHelper.IsEmptyValue(text))
                {
                    throw new ColumnDetectorException(String.Format("Fail to detect column type row: {0} title:{1}", cell.Row, text));
                }
                if (ColumnByDataPredictor.CalcPrecision)
                {
                    ColumnByDataPredictor.PredictForPrecisionCheck(adapter, cell, field);
                }

                AddColumn(columnOrdering, field, cell);
                if (TableHeader.SearchForFioColumnOnly)
                {
                    if (HeaderHelpers.IsNameDeclarationField(field))
                    {
                        break;
                    }
                }
            }
        }
        public static bool TestFieldWithoutOwntypes(DeclarationField field, Cell cell)
        {
            if (cell.IsEmpty)
            {
                return(false);
            }
            string text = cell.GetText(true);

            if ((field & DeclarationField.SquareMask) > 0 && DataHelper.ParseSquare(text).HasValue)
            {
                return(true);
            }

            var predictedField = ColumnByDataPredictor.PredictByString(text);

            return((predictedField & ~DeclarationField.AllOwnTypes) == (field & ~DeclarationField.AllOwnTypes));
        }
示例#4
0
        static Dictionary <DeclarationField, Cell> MapByOrderAndIntersection(TableHeader columnOrdering, List <Cell> cells)
        {
            if (columnOrdering.MergedColumnOrder.Count != cells.Count)
            {
                return(null);
            }
            int start           = cells[0].AdditTableIndention;
            var res             = new Dictionary <DeclarationField, Cell>();
            int pixelErrorCount = 0;

            for (int i = 0; i < cells.Count; i++)
            {
                int s1      = start;
                int e1      = start + cells[i].CellWidth;
                var colInfo = columnOrdering.MergedColumnOrder[i];
                int s2      = colInfo.ColumnPixelStart;
                int e2      = colInfo.ColumnPixelStart + colInfo.ColumnPixelWidth;
                if (TableHeader.PeriodIntersection(s1, e1, s2, e2) == 0)
                {
                    pixelErrorCount += 1;
                    if (!DataHelper.IsEmptyValue(cells[i].Text))
                    {
                        if (!ColumnByDataPredictor.TestFieldWithoutOwntypes(colInfo.Field, cells[i]))
                        {
                            Logger.Debug(string.Format("cannot map column N={0} text={1}", i, cells[i].Text.Replace("\n", "\\n")));
                            return(null);
                        }
                        else
                        {
                            Logger.Debug(string.Format("found semantic argument for mapping N={0} text={1} to {2}",
                                                       i, cells[i].Text.Replace("\n", "\\n"), colInfo.Field));
                            pixelErrorCount = 0;
                        }
                    }
                }
                res[columnOrdering.MergedColumnOrder[i].Field] = cells[i];

                start = e1;
            }
            if (pixelErrorCount >= 3)
            {
                return(null);
            }
            return(res);
        }
示例#5
0
        public void FindBordersAndPersonNames(TableHeader columnOrdering, bool updateTrigrams)
        {
            int rowOffset = columnOrdering.FirstDataRow;

            if (columnOrdering.Section != null)
            {
                CreateNewSection(rowOffset, columnOrdering.Section);
            }

            bool   skipEmptyPerson = false;
            string prevPersonName  = "";

            for (int row = rowOffset; row < Adapter.GetRowsCount(); row++)
            {
                DataRow currRow = Adapter.GetRow(columnOrdering, row);
                if (currRow == null || currRow.IsEmpty())
                {
                    continue;
                }
                if (IAdapter.IsNumbersRow(currRow.Cells))
                {
                    continue;
                }
                Logger.Debug(String.Format("currRow {0}, col_count={1}: {2}", row, currRow.Cells.Count, currRow.DebugString()));

                string sectionName;
                if (Adapter.IsSectionRow(row, currRow.Cells, columnOrdering.GetMaxColumnEndIndex(), false, out sectionName))
                {
                    CreateNewSection(row, sectionName);
                    continue;
                }
                {
                    TableHeader newColumnOrdering;
                    if (IsHeaderRow(currRow, out newColumnOrdering))
                    {
                        columnOrdering = newColumnOrdering;
                        Logger.Debug(String.Format("found a new table header {0}", currRow.DebugString()));
                        row = newColumnOrdering.GetPossibleHeaderEnd() - 1; // row++ in "for" cycle
                        continue;
                    }
                }

                if (updateTrigrams)
                {
                    ColumnByDataPredictor.UpdateByRow(columnOrdering, currRow);
                }

                if (!currRow.InitPersonData(prevPersonName))
                {
                    // be robust, ignore errors see 8562.pdf.docx in tests
                    continue;
                }

                if (currRow.PersonName != String.Empty)
                {
                    prevPersonName = currRow.PersonName;
                    CreateNewDeclarant(Adapter, currRow);
                    if (CurrentPerson != null)
                    {
                        skipEmptyPerson = false;
                    }
                }
                else if (currRow.RelativeType != String.Empty)
                {
                    if (!skipEmptyPerson)
                    {
                        try
                        {
                            CreateNewRelative(currRow);
                        }
                        catch (SmartParserRelativeWithoutPersonException e)
                        {
                            skipEmptyPerson = true;
                            Logger.Error(e.Message);
                            continue;
                        }
                    }
                }
                else
                {
                    if (CurrentPerson == null && FailOnRelativeOrphan)
                    {
                        skipEmptyPerson = true;
                        Logger.Error(String.Format("No person to attach info on row={0}", row));
                        continue;
                    }
                }
                if (!skipEmptyPerson)
                {
                    AddInputRowToCurrentPerson(columnOrdering, currRow);
                    if (_Declaration.Properties.Year == null && columnOrdering.ContainsField(DeclarationField.IncomeYear))
                    {
                        var incomeYear = currRow.GetDeclarationField(DeclarationField.IncomeYear);
                        if (incomeYear != null)
                        {
                            _Declaration.Properties.Year = int.Parse(incomeYear.Text);
                        }
                    }
                }
            }
            if (updateTrigrams)
            {
                ColumnByDataPredictor.WriteData();
            }

            Logger.Info("Parsed {0} declarants", _Declaration.PublicServants.Count());
        }