public static DeclarationField PredictByStrings(List <string> words) { var negativeFreqs = new Dictionary <DeclarationField, double>(); foreach (string w in words) { if (DataHelper.IsEmptyValue(w)) { continue; } var f = HeaderHelpers.TryGetField("", w); if (f == DeclarationField.None) { f = PredictByString(w); } if (negativeFreqs.ContainsKey(f)) { negativeFreqs[f] -= 1; } else { negativeFreqs[f] = -1; } } return(FindMin(negativeFreqs)); }
static public void MapColumnTitlesToInnerConstants(IAdapter adapter, List <Cell> cells, TableHeader columnOrdering) { foreach (var cell in cells) { string text = cell.GetText(true); Logger.Debug(string.Format("column title: \"{0}\"[{1}]", text.ReplaceEolnWithSpace().CoalesceWhitespace(), cell.CellWidth)); DeclarationField field; string clean_text = AbsenceMarkers.Aggregate(text, (x, y) => x.Replace(y, "")).Trim(); if (adapter.GetRowsCount() == cell.MergedRowsCount) { continue; } if ((text == "" || clean_text.Length <= 1) && (text != "№")) { // too short title, try to predict by values field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell); Logger.Debug("Predict: " + field.ToString()); } else { field = HeaderHelpers.TryGetField(cell.TextAbove, text); if ((field == DeclarationField.None) && clean_text.Length <= 4) { field = ColumnByDataPredictor.PredictEmptyColumnTitle(adapter, cell); Logger.Debug("Predict: " + field.ToString()); } if (field == DeclarationField.None) { throw new SmartParserException(String.Format("Cannot recognize field \"{0}\"", text.Replace('\n', ' '))); } } if (field == DeclarationField.None && !DataHelper.IsEmptyValue(text)) { throw new ColumnDetectorException(String.Format("Fail to detect column type row: {0} title:{1}", cell.Row, text)); } if (ColumnByDataPredictor.CalcPrecision) { ColumnByDataPredictor.PredictForPrecisionCheck(adapter, cell, field); } AddColumn(columnOrdering, field, cell); if (TableHeader.SearchForFioColumnOnly) { if (HeaderHelpers.IsNameDeclarationField(field)) { break; } } } }