public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers) { var textColumns = table.Columns.Where(c => DataTypes.DataTypesLists.TextTypes().Contains(c.DataType)); var candidates = textColumns.Select(c => new Candidate(c)).ToList(); using (var rowEnumerable = table.GetTableRowEnumerable()) foreach (var row in rowEnumerable) { foreach (var candidate in candidates.ToArray()) { Object val = row[candidate.Column.ColumnName]; if (val is DBNull || (val is String) == false) { continue; } String str = (String)val; ValueType type = Classifier.Classify(str); if (type != ValueType.Date && type != ValueType.Float && type != ValueType.Int) { candidates.Remove(candidate); continue; } if (candidate.ValuesFound == 0) { candidate.Type = type; } if (type != candidate.Type) { candidates.Remove(candidate); } else { candidate.ValuesFound += 1; } } if (candidates.Count == 0) { break; } } foreach (var candidate in candidates) { if (candidate.ValuesFound < this.minValues.Value) { continue; } Issue i = new Issue(this, this.Severity); i.Name = "Numbers or Dates Stored in Varchar Column"; i.Context = new ColumnContext(candidate.Column); i.Description = new Description("The varchar column '{0}' is used to store values of type '{1}'", candidate.Column, candidate.Type.ToString()); issueCollector.ReportIssue(i); } }
public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers) { var dataTypes = new[] { DataType.CHAR, DataType.NCHAR, DataType.NVARCHAR, DataType.VARCHAR }; var informationContent = providers.GetProvider <InformationContent>(); var columnsToCheck = (from c in table.Columns where dataTypes.Contains(c.DataType) && informationContent[c] < 3 // Avoid checking columns with more than 8 unique values select c).ToArray(); if (columnsToCheck.Length == 0) { return; } var columnNotBooleanCount = DictionaryFactory.CreateColumnID <int>(); foreach (var col in columnsToCheck) { columnNotBooleanCount[col] = 0; } int rowCount = 0; using (var rowEnumerable = table.GetTableRowEnumerable()) foreach (var row in rowEnumerable) { rowCount++; foreach (var column in columnsToCheck) { var value = row[column.ColumnName]; if (value is DBNull || !Classifier.IsBool(value.ToString())) { columnNotBooleanCount[column] += 1; } } // Foreach 128th row, check that all columns are likely to be boolean if ((rowCount & 127) == 0) { var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f; columnsToCheck = columnsToCheck.Where(c => columnNotBooleanCount[c] < allowedDirtiness).ToArray(); if (columnsToCheck.Length == 0) { return; } } } foreach (var column in columnsToCheck) { var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f; if (columnNotBooleanCount[column] < allowedDirtiness) { issueCollector.ReportIssue(new Issue(this, this.Severity) { Name = "Text Column Used for Boolean Values", Context = new ColumnContext(column), Description = new Description("The column '{0}' contains boolean values. Consider using another data type", column), Severity = this.Severity }); } } }