예제 #1
0
        public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers)
        {
            if (table.Cardinality < MinRows.Value)
            {
                return;
            }

            var checkedColumns = table.QueryableColumns;

            if (checkedColumns.Count == 0)
            {
                return;
            }

            var columnNullCount = DictionaryFactory.CreateColumnID <int>();

            foreach (var col in checkedColumns)
            {
                columnNullCount[col] = 0;
            }

            int rowCount = 0;

            using (var rowEnumerable = table.GetTableRowEnumerable())
                foreach (var row in rowEnumerable)
                {
                    rowCount++;
                    foreach (var col in checkedColumns)
                    {
                        var val = row[col.ColumnName];
                        if (val is DBNull)
                        {
                            columnNullCount[col] += 1;
                        }
                    }
                }

            var maxnullCount = this.MaxPercentageNull.Value * rowCount / 100f;

            var columnsWithTooManyNulls = from cp in columnNullCount
                                          where cp.Value > maxnullCount
                                          select cp;

            foreach (var columnWithTooManyNulls in columnsWithTooManyNulls)
            {
                var percentNull = Math.Round(columnWithTooManyNulls.Value * 100f / rowCount, 1);
                issueCollector.ReportIssue(new Issue(this, this.Severity)
                {
                    Name        = "Column Containing Too Many Nulls",
                    Context     = new ColumnContext(columnWithTooManyNulls.Key),
                    Description = new Description("Column '{0}' has {1} percent nulls", columnWithTooManyNulls.Key, percentNull)
                });
            }
        }
예제 #2
0
        public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers)
        {
            var dataTypes = new[] { DataType.CHAR, DataType.NCHAR, DataType.NVARCHAR, DataType.VARCHAR };

            var informationContent = providers.GetProvider <InformationContent>();

            var columnsToCheck = (from c in table.Columns
                                  where dataTypes.Contains(c.DataType) && informationContent[c] < 3 // Avoid checking columns with more than 8 unique values
                                  select c).ToArray();

            if (columnsToCheck.Length == 0)
            {
                return;
            }

            var columnNotBooleanCount = DictionaryFactory.CreateColumnID <int>();

            foreach (var col in columnsToCheck)
            {
                columnNotBooleanCount[col] = 0;
            }

            int rowCount = 0;

            using (var rowEnumerable = table.GetTableRowEnumerable())
                foreach (var row in rowEnumerable)
                {
                    rowCount++;
                    foreach (var column in columnsToCheck)
                    {
                        var value = row[column.ColumnName];
                        if (value is DBNull || !Classifier.IsBool(value.ToString()))
                        {
                            columnNotBooleanCount[column] += 1;
                        }
                    }
                    // Foreach 128th row, check that all columns are likely to be boolean
                    if ((rowCount & 127) == 0)
                    {
                        var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f;
                        columnsToCheck = columnsToCheck.Where(c => columnNotBooleanCount[c] < allowedDirtiness).ToArray();
                        if (columnsToCheck.Length == 0)
                        {
                            return;
                        }
                    }
                }
            foreach (var column in columnsToCheck)
            {
                var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f;
                if (columnNotBooleanCount[column] < allowedDirtiness)
                {
                    issueCollector.ReportIssue(new Issue(this, this.Severity)
                    {
                        Name        = "Text Column Used for Boolean Values",
                        Context     = new ColumnContext(column),
                        Description = new Description("The column '{0}' contains boolean values. Consider using another data type", column),
                        Severity    = this.Severity
                    });
                }
            }
        }