public static DeclarationField PredictByStrings(List <string> words)
        {
            var negativeFreqs = new Dictionary <DeclarationField, double>();

            foreach (string w in words)
            {
                if (DataHelper.IsEmptyValue(w))
                {
                    continue;
                }
                var f = HeaderHelpers.TryGetField("", w);
                if (f == DeclarationField.None)
                {
                    f = PredictByString(w);
                }
                if (negativeFreqs.ContainsKey(f))
                {
                    negativeFreqs[f] -= 1;
                }
                else
                {
                    negativeFreqs[f] = -1;
                }
            }
            return(FindMin(negativeFreqs));
        }
Ejemplo n.º 2
0
        static public void MapColumnTitlesToInnerConstants(IAdapter adapter, List <Cell> cells, TableHeader columnOrdering)
        {
            foreach (var cell in cells)
            {
                string text = cell.GetText(true);
                Logger.Debug(string.Format("column title: \"{0}\"[{1}]", text.ReplaceEolnWithSpace().CoalesceWhitespace(), cell.CellWidth));
                DeclarationField field;
                string           clean_text = AbsenceMarkers.Aggregate(text, (x, y) => x.Replace(y, "")).Trim();

                if (adapter.GetRowsCount() == cell.MergedRowsCount)
                {
                    continue;
                }

                if ((text == "" || clean_text.Length <= 1) && (text != "№"))
                {
                    // too short title, try to predict by values
                    field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell);
                    Logger.Debug("Predict: " + field.ToString());
                }
                else
                {
                    field = HeaderHelpers.TryGetField(cell.TextAbove, text);
                    if ((field == DeclarationField.None) && clean_text.Length <= 4)
                    {
                        field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell);
                        Logger.Debug("Predict: " + field.ToString());
                    }
                    if (field == DeclarationField.None)
                    {
                        throw new SmartParserException(String.Format("Cannot recognize field \"{0}\"", text.Replace('\n', ' ')));
                    }
                }

                if (field == DeclarationField.None && !DataHelper.IsEmptyValue(text))
                {
                    throw new ColumnDetectorException(String.Format("Fail to detect column type row: {0} title:{1}", cell.Row, text));
                }
                if (ColumnByDataPredictor.CalcPrecision)
                {
                    ColumnByDataPredictor.PredictForPrecisionCheck(adapter, cell, field);
                }

                AddColumn(columnOrdering, field, cell);
                if (TableHeader.SearchForFioColumnOnly)
                {
                    if (HeaderHelpers.IsNameDeclarationField(field))
                    {
                        break;
                    }
                }
            }
        }