public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers) { if (table.Cardinality < MinRows.Value) { return; } var checkedColumns = table.QueryableColumns; if (checkedColumns.Count == 0) { return; } var columnNullCount = DictionaryFactory.CreateColumnID <int>(); foreach (var col in checkedColumns) { columnNullCount[col] = 0; } int rowCount = 0; using (var rowEnumerable = table.GetTableRowEnumerable()) foreach (var row in rowEnumerable) { rowCount++; foreach (var col in checkedColumns) { var val = row[col.ColumnName]; if (val is DBNull) { columnNullCount[col] += 1; } } } var maxnullCount = this.MaxPercentageNull.Value * rowCount / 100f; var columnsWithTooManyNulls = from cp in columnNullCount where cp.Value > maxnullCount select cp; foreach (var columnWithTooManyNulls in columnsWithTooManyNulls) { var percentNull = Math.Round(columnWithTooManyNulls.Value * 100f / rowCount, 1); issueCollector.ReportIssue(new Issue(this, this.Severity) { Name = "Column Containing Too Many Nulls", Context = new ColumnContext(columnWithTooManyNulls.Key), Description = new Description("Column '{0}' has {1} percent nulls", columnWithTooManyNulls.Key, percentNull) }); } }
public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers) { var dataTypes = new[] { DataType.CHAR, DataType.NCHAR, DataType.NVARCHAR, DataType.VARCHAR }; var informationContent = providers.GetProvider <InformationContent>(); var columnsToCheck = (from c in table.Columns where dataTypes.Contains(c.DataType) && informationContent[c] < 3 // Avoid checking columns with more than 8 unique values select c).ToArray(); if (columnsToCheck.Length == 0) { return; } var columnNotBooleanCount = DictionaryFactory.CreateColumnID <int>(); foreach (var col in columnsToCheck) { columnNotBooleanCount[col] = 0; } int rowCount = 0; using (var rowEnumerable = table.GetTableRowEnumerable()) foreach (var row in rowEnumerable) { rowCount++; foreach (var column in columnsToCheck) { var value = row[column.ColumnName]; if (value is DBNull || !Classifier.IsBool(value.ToString())) { columnNotBooleanCount[column] += 1; } } // Foreach 128th row, check that all columns are likely to be boolean if ((rowCount & 127) == 0) { var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f; columnsToCheck = columnsToCheck.Where(c => columnNotBooleanCount[c] < allowedDirtiness).ToArray(); if (columnsToCheck.Length == 0) { return; } } } foreach (var column in columnsToCheck) { var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f; if (columnNotBooleanCount[column] < allowedDirtiness) { issueCollector.ReportIssue(new Issue(this, this.Severity) { Name = "Text Column Used for Boolean Values", Context = new ColumnContext(column), Description = new Description("The column '{0}' contains boolean values. Consider using another data type", column), Severity = this.Severity }); } } }