예제 #1
0
        public override void Execute(Database database, IProviderCollection providers)
        {
            ITableImportance importance = providers.GetProvider <TableDataImportance>();

            if (importance == null)
            {
                importance = providers.GetProvider <PageRank>();
            }

            this._importanceBackend = importance;
        }
예제 #2
0
        private void Write(String dirName, DatabaseLint dblint)
        {
            this.scoring = new IScoringImpl();
            this.scoring.CalculateScores(dblint);

            if (DBLint.Settings.IsNormalContext)
            {
                //Save run for incremental viewing
                //File name is a timestamp
                DateTime now      = DateTime.Now;
                String   fileName = String.Format("{0}{1}{2}{3}{4}{5}.xml", now.Year, now.Month, now.Day, now.Hour,
                                                  now.Minute, now.Second);
                //Folder, i.e.: runs/dbname/
                String folder   = Settings.INCREMENTAL_FOLDER + "testtest"; // dblint.DatabaseModel.DatabaseName;
                String filePath = folder + "/" + fileName;
                if (!Directory.Exists(folder))
                {
                    Directory.CreateDirectory(folder);
                }


                //Create run
                DBLint.IncrementalRuns.Run run = new IncrementalRuns.Run(dblint.DatabaseModel, dblint.IssueCollector, scoring.GetScores());
                //Write run
                using (FileStream writer = new FileStream(filePath, FileMode.Create))
                {
                    DataContractSerializer ser = new DataContractSerializer(typeof(DBLint.IncrementalRuns.Run));
                    ser.WriteObject(writer, run);
                    writer.Flush();
                }
            }

            DirectoryInfo dir = new DirectoryInfo(dirName);

            int tableNameCounter = 1;

            foreach (Table table in dblint.DatabaseModel.Tables)
            {
                String tName = "table" + tableNameCounter.ToString();
                this.tableNames.Add(table, tName);
                this.tableFiles.Add(table, "tables/" + tName + ".html");
                tableNameCounter++;
            }

            this.dblint = dblint;

            this.formatter = new HTMLDescriptionFormatter(this.tableFiles);

            IssueCollector issues = dblint.IssueCollector;

            //Create result directory if it does not exist
            if (!dir.Exists)
            {
                dir.Create();
            }

            VelocityContext context = new VelocityContext();

            context.Put("db", dblint.DatabaseModel);
            context.Put("totalScore", this.scoring.GetScore());
            context.Put("issuesTotal", issues.Count());
            context.Put("rulesExecuted", this.getRulesExecuted());
            context.Put("ruleTypes", this.getRuleTypes());
            context.Put("formatter", this.formatter);
            context.Put("HTMLBuilder", this);
            context.Put("summaries", this.dblint.ExecutionSummary);
            context.Put("executionTime", this.formatTimeSpan(this.dblint.ExecutionSummary.ExecutionTime));

            //Pagerank
            IProviderCollection providers = dblint.RuleController.ProviderCollection;
            var rank = providers.GetProvider <DBLint.Rules.SchemaProviders.ImportanceProvider>();

            //List all tables
            var tables = (from t in dblint.DatabaseModel.Tables
                          select new
            {
                Table = t,
                Name = t.TableName,
                IssueCount = issues.GetIssues(t).Count(),
                Score = this.scoring.GetScore(t),
                Importance = Math.Round(rank[t], 1)
            }).ToList();

            context.Put("tables", tables);

            //Bottom tables
            var bottom = tables.OrderBy(t => t.Score).Take(5).ToList();

            context.Put("bottomTables", bottom);

            int groupId = 0; //Used in the template to identify a group of issues
            //Group issues by name
            var issueGroups = (from i in issues
                               group i by i.Name into g
                               orderby g.First().Severity
                               select new
            {
                Name = g.Key,
                Count = g.Count(),
                Issues = g,
                GroupID = ++groupId,
                Severity = g.First().Severity
            }).ToList();

            context.Put("issueGroups", issueGroups);

            //Put issueGroups into severity groups
            var severityGroups = (from issueGroup in issueGroups
                                  group issueGroup by issueGroup.Severity into g
                                  orderby g.First().Severity
                                  select new
            {
                Severity = g.First().Severity,
                IssueGroups = g
            }
                                  );

            context.Put("severityGroups", severityGroups);

            //Incremental runs list
            var diffs = new List <DBLint.IncrementalRuns.Diff>();

            if (DBLint.Settings.IsNormalContext)
            {
                //Incremental runs
                try
                {
                    var runs = DBLint.IncrementalRuns.Run.GetRuns(dblint.DatabaseModel.DatabaseName, 5).ToList();
                    for (int i = 1; i < runs.Count; i++)
                    {
                        var diff = new DBLint.IncrementalRuns.Diff();
                        diff.Compare(runs[i], runs[i - 1]);
                        diffs.Add(diff);
                    }
                }
                catch { }
                context.Put("diffs", diffs);
            }
            //Create template for the main html page
            Template template = Velocity.GetTemplate("mainpage.vm");

            //Create outputstream for the main page
            TextWriter htmlOut = new StreamWriter(Path.Combine(dir.FullName, "mainpage.html"));

            //Write template
            template.Merge(context, htmlOut);
            htmlOut.Close();

            //Write issue groups
            String issuePath = Path.Combine(dir.FullName, "issues");

            if (!Directory.Exists(issuePath))
            {
                Directory.CreateDirectory(issuePath);
            }
            Template issueGroupTemplate = Velocity.GetTemplate("issuegroup.vm");

            formatter.PathPrefix = "../";
            foreach (var g in issueGroups)
            {
                context.Put("groupIssues", g.Issues);
                TextWriter issueOut = new StreamWriter(Path.Combine(issuePath, g.GroupID.ToString() + ".html"));
                issueGroupTemplate.Merge(context, issueOut);
                issueOut.Close();
            }
            if (DBLint.Settings.IsNormalContext)
            {
                //Write diffs/increments to files:
                String incPath = Path.Combine(dir.FullName, "increments");
                if (!Directory.Exists(incPath))
                {
                    Directory.CreateDirectory(incPath);
                }
                Template incrementTemplate = Velocity.GetTemplate("increment.vm");
                int      diffId            = 0;
                foreach (var diff in diffs)
                {
                    diffId++;
                    context.Put("diff", diff);
                    TextWriter incOut = new StreamWriter(Path.Combine(incPath, diffId.ToString() + ".html"));
                    incrementTemplate.Merge(context, incOut);
                    incOut.Close();
                }
            }

            formatter.PathPrefix = "";
            writeTableViews(dirName);
        }
예제 #3
0
        public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers)
        {
            var informationContent = providers.GetProvider <InformationContent>();

            if (table.PrimaryKey == null || table.Cardinality < MinRows.Value)
            {
                return;
            }
            var pkcolumns = table.PrimaryKey.Columns;
            var escaper   = table.Database.Escaper;
            // Heuristic: Only check columns storing a lot of entropy

            var entropyOrderedCOlumns = pkcolumns.OrderByDescending(col => informationContent[col]).ToArray();

            double maxPossibleCardinality = 1;
            var    currentColumns         = new List <Column>(entropyOrderedCOlumns.Length);
            var    first = true;
            var    currentColumnsString = new StringBuilder();

            foreach (var col in entropyOrderedCOlumns)
            {
                currentColumns.Add(col);
                if (currentColumns.Count == entropyOrderedCOlumns.Length)
                {
                    break; // Last column added. It is known to be a key.
                }
                maxPossibleCardinality *= informationContent[col];
                if (table.Cardinality - maxPossibleCardinality > 0.2f)
                {
                    continue; // If not enough entropy to generate a higher card. no need to query the data.
                }
                var escapedCol = escaper.Escape(col);
                if (first)
                {
                    currentColumnsString.AppendFormat("{0}", escapedCol);
                    first = false;
                }
                else
                {
                    currentColumnsString.AppendFormat(", {0}", escapedCol);
                }

                var query = string.Format(@"SELECT  COUNT(*)
                                            FROM    ( SELECT    COUNT(*) AS rowcnt
                                                      FROM      {0}
                                                      GROUP BY  {1}
                                                    ) AS exp1
                                            WHERE   rowcnt > 1 ", escaper.Escape(table), currentColumnsString.ToString());
                var res   = table.QueryTable(query);
                if (res is DBNull)
                {
                    break;
                }

                var num = Convert.ToInt32(res);
                if (num == 0)
                {
                    issueCollector.ReportIssue(new Issue(this, this.Severity)
                    {
                        Name                = "Defined Primary Key is not a Minimal Key",
                        Context             = new TableContext(table),
                        Description         = new Description("Primary key for table {0}, is a superkey.", table),
                        ExtendedDescription = new Description("Columns {0} are enough to uniquely identify a tuple. Currently used are {1}", currentColumns, table.PrimaryKey.Columns),
                    });
                    break;
                }
            }
        }
예제 #4
0
        public override void Finalize(Model.Database database, IProviderCollection providers)
        {
            var informationContent = providers.GetProvider <InformationContent>();
            var fks = database.Tables.SelectMany(t => t.ForeignKeys);

            DatabaseDictionary <TableID, List <JoinEdge> > dbJoinEdges = DictionaryFactory.CreateTableID <List <JoinEdge> >();
            DatabaseDictionary <TableID, double>           tableTotalEntropyTransfer = DictionaryFactory.CreateTableID <double>();

            foreach (var tbl in database.Tables)
            {
                dbJoinEdges[tbl] = new List <JoinEdge>(4);
                tableTotalEntropyTransfer[tbl] = 0;
            }

            foreach (var foreignKey in fks)
            {
                var pkColumns = (from cp in foreignKey.ColumnPairs
                                 select cp.PKColumn).ToArray();
                var fkColumns = (from cp in foreignKey.ColumnPairs
                                 select cp.FKColumn).ToArray();

                double fkEdgeEntropy;
                if (foreignKey.IsSingleColumn)
                {
                    fkEdgeEntropy = informationContent[foreignKey.FKColumn];
                }
                else
                {
                    fkEdgeEntropy = informationContent.GetMultiColumnEntropy((DataTable)foreignKey.FKTable, fkColumns);
                }

                var pkEdgeEntropy = Math.Log(Math.Max(foreignKey.PKTable.Cardinality, 1), 2); // Primary key guarantees uniqueness across pkcolumns, hence entropy equals log of cardinality.
                dbJoinEdges[foreignKey.PKTable].Add(new JoinEdge {
                    Table = foreignKey.FKTable, Columns = fkColumns, EdgeEntropy = fkEdgeEntropy
                });
                dbJoinEdges[foreignKey.FKTable].Add(new JoinEdge {
                    Table = foreignKey.PKTable, Columns = pkColumns, EdgeEntropy = pkEdgeEntropy
                });

                tableTotalEntropyTransfer[foreignKey.PKTable] += pkEdgeEntropy;
                tableTotalEntropyTransfer[foreignKey.FKTable] += fkEdgeEntropy;
            }

            DatabaseDictionary <TableID, DatabaseDictionary <TableID, double> > pmatrix = DictionaryFactory.CreateTableID <DatabaseDictionary <TableID, double> >();

            foreach (var tbl in database.Tables)
            {
                pmatrix[tbl] = DictionaryFactory.CreateTableID <double>();
            }

            foreach (var toTable in database.Tables)
            {
                var joinEdges = dbJoinEdges[toTable];
                foreach (var joinEdge in joinEdges)
                {
                    var fromTable               = joinEdge.Table;
                    var columnsEntropy          = joinEdge.EdgeEntropy;
                    var tableInformationContent = informationContent[fromTable];
                    var tableTotalTransfer      = tableTotalEntropyTransfer[fromTable];
                    var todic = pmatrix[toTable];
                    if (!todic.ContainsKey(fromTable))
                    {
                        todic[fromTable] = 0;
                    }

                    if (tableInformationContent + tableTotalTransfer > 0)
                    {
                        todic[fromTable] += columnsEntropy / (tableInformationContent + tableTotalTransfer);
                    }
                }
                var toTableRow = pmatrix[toTable];
            }

            foreach (var keyRow in pmatrix.Keys)
            {
                double selfLoopValue = 1d;
                foreach (var keyColumn in pmatrix.Keys)
                {
                    var row = pmatrix[keyColumn];
                    if (row.ContainsKey(keyRow))
                    {
                        selfLoopValue -= row[keyRow];
                    }
                }
                if (selfLoopValue < 0)
                {
                }
                pmatrix[keyRow][keyRow] = selfLoopValue;
            }


            DatabaseDictionary <TableID, double> importanceVector = DictionaryFactory.CreateTableID <double>();
            DatabaseDictionary <TableID, double> calculateVector  = DictionaryFactory.CreateTableID <double>();

            foreach (var table in database.Tables)
            {
                importanceVector[table] = Math.Max(informationContent[table], 0);
            }

            for (int i = 0; i < 100; i++)
            {
                foreach (var table in importanceVector.Keys)
                {
                    double newRank    = 0;
                    var    fromTables = pmatrix[table];
                    foreach (var fromTable in fromTables)
                    {
                        var fromRank = importanceVector[fromTable.Key];
                        newRank += fromRank * fromTable.Value;
                    }
                    calculateVector[table] = newRank;
                }

                {
                    var tmp = calculateVector;
                    calculateVector  = importanceVector;
                    importanceVector = tmp;
                }
            }

            var totalEntropy = importanceVector.Values.Sum();

            foreach (var k in importanceVector.Keys)
            {
                if (importanceVector[k] < 0)
                {
                }
                if (totalEntropy == 0)
                {
                    importanceVector[k] = 100f / database.Tables.Count;
                }
                else
                {
                    importanceVector[k] *= 100f / totalEntropy;
                }
            }
            this._importanceVector = importanceVector;
        }
예제 #5
0
        public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers)
        {
            var dataTypes = new[] { DataType.CHAR, DataType.NCHAR, DataType.NVARCHAR, DataType.VARCHAR };

            var informationContent = providers.GetProvider <InformationContent>();

            var columnsToCheck = (from c in table.Columns
                                  where dataTypes.Contains(c.DataType) && informationContent[c] < 3 // Avoid checking columns with more than 8 unique values
                                  select c).ToArray();

            if (columnsToCheck.Length == 0)
            {
                return;
            }

            var columnNotBooleanCount = DictionaryFactory.CreateColumnID <int>();

            foreach (var col in columnsToCheck)
            {
                columnNotBooleanCount[col] = 0;
            }

            int rowCount = 0;

            using (var rowEnumerable = table.GetTableRowEnumerable())
                foreach (var row in rowEnumerable)
                {
                    rowCount++;
                    foreach (var column in columnsToCheck)
                    {
                        var value = row[column.ColumnName];
                        if (value is DBNull || !Classifier.IsBool(value.ToString()))
                        {
                            columnNotBooleanCount[column] += 1;
                        }
                    }
                    // Foreach 128th row, check that all columns are likely to be boolean
                    if ((rowCount & 127) == 0)
                    {
                        var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f;
                        columnsToCheck = columnsToCheck.Where(c => columnNotBooleanCount[c] < allowedDirtiness).ToArray();
                        if (columnsToCheck.Length == 0)
                        {
                            return;
                        }
                    }
                }
            foreach (var column in columnsToCheck)
            {
                var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f;
                if (columnNotBooleanCount[column] < allowedDirtiness)
                {
                    issueCollector.ReportIssue(new Issue(this, this.Severity)
                    {
                        Name        = "Text Column Used for Boolean Values",
                        Context     = new ColumnContext(column),
                        Description = new Description("The column '{0}' contains boolean values. Consider using another data type", column),
                        Severity    = this.Severity
                    });
                }
            }
        }
예제 #6
0
        public override void Execute(Database database, IIssueCollector issueCollector, IProviderCollection providers)
        {
            if (database.Tables.Count == 0)
            {
                return;
            }

            float invalidThreshold = this.InvalidThreshold.Value / 100f;

            int columnsTotal = database.Columns.Count;
            INameConventionDetector naming;

            if (this.NamingConventionRepresentation.Value == DataTypes.NamingConventionRepresentation.Markov)
            {
                naming = new MarkovConventionDetector(MarkovTolerance.Value / 100f);
            }
            else
            {
                naming = new TrieNameDetector(30);
            }
            var columnNames = new List <String>();
            var importance  = providers.GetProvider <SchemaProviders.ImportanceProvider>();

            foreach (var column in database.Columns)
            {
                double tableRank = importance[column.Table];
                int    weight;
                if (tableRank < 1)
                {
                    weight = 1;
                }
                else
                {
                    weight = (int)(tableRank);
                }

                //for (int i = 0; i < weight; i++)
                //    columnNames.Add(column.ColumnName);
                columnNames.Add(column.ColumnName);
            }

            Regex numPat = new Regex("[0-9]");

            columnNames.RemoveAll(name => numPat.Match(name).Success);

            //Detect convention
            bool detected       = naming.DetectConvention(columnNames);
            var  invalidColumns = (from col in database.Columns
                                   where naming.IsValid(col.ColumnName) == false
                                   select col).ToList();
            float percentInvalid = (float)invalidColumns.Count / columnsTotal;

            if (detected == false || percentInvalid > invalidThreshold)
            {
                var issue = new Issue(this, NoNamingConventionSeverity.Value);
                issue.Name        = this.nonExisting;
                issue.Context     = new DatabaseContext(database);
                issue.Description = new Description("Unable to find a naming convention for columns");
                issueCollector.ReportIssue(issue);
            }
            else
            {
                //Raise an issue for all columns that don't use the convention
                foreach (Column col in invalidColumns)
                {
                    var issue = new Issue(this, this.DefaultSeverity.Value);
                    issue.Name        = this.inconsistent;
                    issue.Context     = new ColumnContext(col);
                    issue.Description = new Description("Column '{0}' in table {1} does not follow the naming convention", col, col.Table);
                    issueCollector.ReportIssue(issue);
                }
            }

            //Tables
            if (this.NamingConventionRepresentation.Value == DataTypes.NamingConventionRepresentation.Markov)
            {
                naming = new MarkovConventionDetector(MarkovTolerance.Value / 100f);
            }
            else
            {
                naming = new TrieNameDetector(30);
            }

            var tableNames = database.Tables.Select(t => t.TableName);

            detected = naming.DetectConvention(tableNames);

            var invalidTables = (from table in database.Tables
                                 where naming.IsValid(table.TableName) == false
                                 select table).ToList();
            float percentInvalidTables = (float)invalidTables.Count / database.Tables.Count;

            if (detected == false || percentInvalidTables > invalidThreshold)
            {
                var issue = new Issue(this, NoNamingConventionSeverity.Value);
                issue.Name        = this.nonExisting;
                issue.Context     = new DatabaseContext(database);
                issue.Description = new Description("Unable to find a naming convention for tables");
                issueCollector.ReportIssue(issue);
            }
            else
            {
                foreach (Table table in invalidTables)
                {
                    var issue = new Issue(this, this.DefaultSeverity.Value);
                    issue.Name        = this.inconsistent;
                    issue.Context     = new TableContext(table);
                    issue.Description = new Description("Table name '{0}' does not follow the naming convention", table);
                    issueCollector.ReportIssue(issue);
                }
            }
        }