public override void Execute(Database database, IProviderCollection providers) { ITableImportance importance = providers.GetProvider <TableDataImportance>(); if (importance == null) { importance = providers.GetProvider <PageRank>(); } this._importanceBackend = importance; }
private void Write(String dirName, DatabaseLint dblint) { this.scoring = new IScoringImpl(); this.scoring.CalculateScores(dblint); if (DBLint.Settings.IsNormalContext) { //Save run for incremental viewing //File name is a timestamp DateTime now = DateTime.Now; String fileName = String.Format("{0}{1}{2}{3}{4}{5}.xml", now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second); //Folder, i.e.: runs/dbname/ String folder = Settings.INCREMENTAL_FOLDER + "testtest"; // dblint.DatabaseModel.DatabaseName; String filePath = folder + "/" + fileName; if (!Directory.Exists(folder)) { Directory.CreateDirectory(folder); } //Create run DBLint.IncrementalRuns.Run run = new IncrementalRuns.Run(dblint.DatabaseModel, dblint.IssueCollector, scoring.GetScores()); //Write run using (FileStream writer = new FileStream(filePath, FileMode.Create)) { DataContractSerializer ser = new DataContractSerializer(typeof(DBLint.IncrementalRuns.Run)); ser.WriteObject(writer, run); writer.Flush(); } } DirectoryInfo dir = new DirectoryInfo(dirName); int tableNameCounter = 1; foreach (Table table in dblint.DatabaseModel.Tables) { String tName = "table" + tableNameCounter.ToString(); this.tableNames.Add(table, tName); this.tableFiles.Add(table, "tables/" + tName + ".html"); tableNameCounter++; } this.dblint = dblint; this.formatter = new HTMLDescriptionFormatter(this.tableFiles); IssueCollector issues = dblint.IssueCollector; //Create result directory if it does not exist if (!dir.Exists) { dir.Create(); } VelocityContext context = new VelocityContext(); context.Put("db", dblint.DatabaseModel); context.Put("totalScore", this.scoring.GetScore()); context.Put("issuesTotal", issues.Count()); context.Put("rulesExecuted", this.getRulesExecuted()); context.Put("ruleTypes", this.getRuleTypes()); context.Put("formatter", this.formatter); context.Put("HTMLBuilder", this); context.Put("summaries", this.dblint.ExecutionSummary); context.Put("executionTime", this.formatTimeSpan(this.dblint.ExecutionSummary.ExecutionTime)); //Pagerank IProviderCollection providers = dblint.RuleController.ProviderCollection; var rank = providers.GetProvider <DBLint.Rules.SchemaProviders.ImportanceProvider>(); //List all tables var tables = (from t in dblint.DatabaseModel.Tables select new { Table = t, Name = t.TableName, IssueCount = issues.GetIssues(t).Count(), Score = this.scoring.GetScore(t), Importance = Math.Round(rank[t], 1) }).ToList(); context.Put("tables", tables); //Bottom tables var bottom = tables.OrderBy(t => t.Score).Take(5).ToList(); context.Put("bottomTables", bottom); int groupId = 0; //Used in the template to identify a group of issues //Group issues by name var issueGroups = (from i in issues group i by i.Name into g orderby g.First().Severity select new { Name = g.Key, Count = g.Count(), Issues = g, GroupID = ++groupId, Severity = g.First().Severity }).ToList(); context.Put("issueGroups", issueGroups); //Put issueGroups into severity groups var severityGroups = (from issueGroup in issueGroups group issueGroup by issueGroup.Severity into g orderby g.First().Severity select new { Severity = g.First().Severity, IssueGroups = g } ); context.Put("severityGroups", severityGroups); //Incremental runs list var diffs = new List <DBLint.IncrementalRuns.Diff>(); if (DBLint.Settings.IsNormalContext) { //Incremental runs try { var runs = DBLint.IncrementalRuns.Run.GetRuns(dblint.DatabaseModel.DatabaseName, 5).ToList(); for (int i = 1; i < runs.Count; i++) { var diff = new DBLint.IncrementalRuns.Diff(); diff.Compare(runs[i], runs[i - 1]); diffs.Add(diff); } } catch { } context.Put("diffs", diffs); } //Create template for the main html page Template template = Velocity.GetTemplate("mainpage.vm"); //Create outputstream for the main page TextWriter htmlOut = new StreamWriter(Path.Combine(dir.FullName, "mainpage.html")); //Write template template.Merge(context, htmlOut); htmlOut.Close(); //Write issue groups String issuePath = Path.Combine(dir.FullName, "issues"); if (!Directory.Exists(issuePath)) { Directory.CreateDirectory(issuePath); } Template issueGroupTemplate = Velocity.GetTemplate("issuegroup.vm"); formatter.PathPrefix = "../"; foreach (var g in issueGroups) { context.Put("groupIssues", g.Issues); TextWriter issueOut = new StreamWriter(Path.Combine(issuePath, g.GroupID.ToString() + ".html")); issueGroupTemplate.Merge(context, issueOut); issueOut.Close(); } if (DBLint.Settings.IsNormalContext) { //Write diffs/increments to files: String incPath = Path.Combine(dir.FullName, "increments"); if (!Directory.Exists(incPath)) { Directory.CreateDirectory(incPath); } Template incrementTemplate = Velocity.GetTemplate("increment.vm"); int diffId = 0; foreach (var diff in diffs) { diffId++; context.Put("diff", diff); TextWriter incOut = new StreamWriter(Path.Combine(incPath, diffId.ToString() + ".html")); incrementTemplate.Merge(context, incOut); incOut.Close(); } } formatter.PathPrefix = ""; writeTableViews(dirName); }
public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers) { var informationContent = providers.GetProvider <InformationContent>(); if (table.PrimaryKey == null || table.Cardinality < MinRows.Value) { return; } var pkcolumns = table.PrimaryKey.Columns; var escaper = table.Database.Escaper; // Heuristic: Only check columns storing a lot of entropy var entropyOrderedCOlumns = pkcolumns.OrderByDescending(col => informationContent[col]).ToArray(); double maxPossibleCardinality = 1; var currentColumns = new List <Column>(entropyOrderedCOlumns.Length); var first = true; var currentColumnsString = new StringBuilder(); foreach (var col in entropyOrderedCOlumns) { currentColumns.Add(col); if (currentColumns.Count == entropyOrderedCOlumns.Length) { break; // Last column added. It is known to be a key. } maxPossibleCardinality *= informationContent[col]; if (table.Cardinality - maxPossibleCardinality > 0.2f) { continue; // If not enough entropy to generate a higher card. no need to query the data. } var escapedCol = escaper.Escape(col); if (first) { currentColumnsString.AppendFormat("{0}", escapedCol); first = false; } else { currentColumnsString.AppendFormat(", {0}", escapedCol); } var query = string.Format(@"SELECT COUNT(*) FROM ( SELECT COUNT(*) AS rowcnt FROM {0} GROUP BY {1} ) AS exp1 WHERE rowcnt > 1 ", escaper.Escape(table), currentColumnsString.ToString()); var res = table.QueryTable(query); if (res is DBNull) { break; } var num = Convert.ToInt32(res); if (num == 0) { issueCollector.ReportIssue(new Issue(this, this.Severity) { Name = "Defined Primary Key is not a Minimal Key", Context = new TableContext(table), Description = new Description("Primary key for table {0}, is a superkey.", table), ExtendedDescription = new Description("Columns {0} are enough to uniquely identify a tuple. Currently used are {1}", currentColumns, table.PrimaryKey.Columns), }); break; } } }
public override void Finalize(Model.Database database, IProviderCollection providers) { var informationContent = providers.GetProvider <InformationContent>(); var fks = database.Tables.SelectMany(t => t.ForeignKeys); DatabaseDictionary <TableID, List <JoinEdge> > dbJoinEdges = DictionaryFactory.CreateTableID <List <JoinEdge> >(); DatabaseDictionary <TableID, double> tableTotalEntropyTransfer = DictionaryFactory.CreateTableID <double>(); foreach (var tbl in database.Tables) { dbJoinEdges[tbl] = new List <JoinEdge>(4); tableTotalEntropyTransfer[tbl] = 0; } foreach (var foreignKey in fks) { var pkColumns = (from cp in foreignKey.ColumnPairs select cp.PKColumn).ToArray(); var fkColumns = (from cp in foreignKey.ColumnPairs select cp.FKColumn).ToArray(); double fkEdgeEntropy; if (foreignKey.IsSingleColumn) { fkEdgeEntropy = informationContent[foreignKey.FKColumn]; } else { fkEdgeEntropy = informationContent.GetMultiColumnEntropy((DataTable)foreignKey.FKTable, fkColumns); } var pkEdgeEntropy = Math.Log(Math.Max(foreignKey.PKTable.Cardinality, 1), 2); // Primary key guarantees uniqueness across pkcolumns, hence entropy equals log of cardinality. dbJoinEdges[foreignKey.PKTable].Add(new JoinEdge { Table = foreignKey.FKTable, Columns = fkColumns, EdgeEntropy = fkEdgeEntropy }); dbJoinEdges[foreignKey.FKTable].Add(new JoinEdge { Table = foreignKey.PKTable, Columns = pkColumns, EdgeEntropy = pkEdgeEntropy }); tableTotalEntropyTransfer[foreignKey.PKTable] += pkEdgeEntropy; tableTotalEntropyTransfer[foreignKey.FKTable] += fkEdgeEntropy; } DatabaseDictionary <TableID, DatabaseDictionary <TableID, double> > pmatrix = DictionaryFactory.CreateTableID <DatabaseDictionary <TableID, double> >(); foreach (var tbl in database.Tables) { pmatrix[tbl] = DictionaryFactory.CreateTableID <double>(); } foreach (var toTable in database.Tables) { var joinEdges = dbJoinEdges[toTable]; foreach (var joinEdge in joinEdges) { var fromTable = joinEdge.Table; var columnsEntropy = joinEdge.EdgeEntropy; var tableInformationContent = informationContent[fromTable]; var tableTotalTransfer = tableTotalEntropyTransfer[fromTable]; var todic = pmatrix[toTable]; if (!todic.ContainsKey(fromTable)) { todic[fromTable] = 0; } if (tableInformationContent + tableTotalTransfer > 0) { todic[fromTable] += columnsEntropy / (tableInformationContent + tableTotalTransfer); } } var toTableRow = pmatrix[toTable]; } foreach (var keyRow in pmatrix.Keys) { double selfLoopValue = 1d; foreach (var keyColumn in pmatrix.Keys) { var row = pmatrix[keyColumn]; if (row.ContainsKey(keyRow)) { selfLoopValue -= row[keyRow]; } } if (selfLoopValue < 0) { } pmatrix[keyRow][keyRow] = selfLoopValue; } DatabaseDictionary <TableID, double> importanceVector = DictionaryFactory.CreateTableID <double>(); DatabaseDictionary <TableID, double> calculateVector = DictionaryFactory.CreateTableID <double>(); foreach (var table in database.Tables) { importanceVector[table] = Math.Max(informationContent[table], 0); } for (int i = 0; i < 100; i++) { foreach (var table in importanceVector.Keys) { double newRank = 0; var fromTables = pmatrix[table]; foreach (var fromTable in fromTables) { var fromRank = importanceVector[fromTable.Key]; newRank += fromRank * fromTable.Value; } calculateVector[table] = newRank; } { var tmp = calculateVector; calculateVector = importanceVector; importanceVector = tmp; } } var totalEntropy = importanceVector.Values.Sum(); foreach (var k in importanceVector.Keys) { if (importanceVector[k] < 0) { } if (totalEntropy == 0) { importanceVector[k] = 100f / database.Tables.Count; } else { importanceVector[k] *= 100f / totalEntropy; } } this._importanceVector = importanceVector; }
public override void Execute(DataTable table, IIssueCollector issueCollector, IProviderCollection providers) { var dataTypes = new[] { DataType.CHAR, DataType.NCHAR, DataType.NVARCHAR, DataType.VARCHAR }; var informationContent = providers.GetProvider <InformationContent>(); var columnsToCheck = (from c in table.Columns where dataTypes.Contains(c.DataType) && informationContent[c] < 3 // Avoid checking columns with more than 8 unique values select c).ToArray(); if (columnsToCheck.Length == 0) { return; } var columnNotBooleanCount = DictionaryFactory.CreateColumnID <int>(); foreach (var col in columnsToCheck) { columnNotBooleanCount[col] = 0; } int rowCount = 0; using (var rowEnumerable = table.GetTableRowEnumerable()) foreach (var row in rowEnumerable) { rowCount++; foreach (var column in columnsToCheck) { var value = row[column.ColumnName]; if (value is DBNull || !Classifier.IsBool(value.ToString())) { columnNotBooleanCount[column] += 1; } } // Foreach 128th row, check that all columns are likely to be boolean if ((rowCount & 127) == 0) { var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f; columnsToCheck = columnsToCheck.Where(c => columnNotBooleanCount[c] < allowedDirtiness).ToArray(); if (columnsToCheck.Length == 0) { return; } } } foreach (var column in columnsToCheck) { var allowedDirtiness = rowCount * DirtinessFactor.Value / 100f; if (columnNotBooleanCount[column] < allowedDirtiness) { issueCollector.ReportIssue(new Issue(this, this.Severity) { Name = "Text Column Used for Boolean Values", Context = new ColumnContext(column), Description = new Description("The column '{0}' contains boolean values. Consider using another data type", column), Severity = this.Severity }); } } }
public override void Execute(Database database, IIssueCollector issueCollector, IProviderCollection providers) { if (database.Tables.Count == 0) { return; } float invalidThreshold = this.InvalidThreshold.Value / 100f; int columnsTotal = database.Columns.Count; INameConventionDetector naming; if (this.NamingConventionRepresentation.Value == DataTypes.NamingConventionRepresentation.Markov) { naming = new MarkovConventionDetector(MarkovTolerance.Value / 100f); } else { naming = new TrieNameDetector(30); } var columnNames = new List <String>(); var importance = providers.GetProvider <SchemaProviders.ImportanceProvider>(); foreach (var column in database.Columns) { double tableRank = importance[column.Table]; int weight; if (tableRank < 1) { weight = 1; } else { weight = (int)(tableRank); } //for (int i = 0; i < weight; i++) // columnNames.Add(column.ColumnName); columnNames.Add(column.ColumnName); } Regex numPat = new Regex("[0-9]"); columnNames.RemoveAll(name => numPat.Match(name).Success); //Detect convention bool detected = naming.DetectConvention(columnNames); var invalidColumns = (from col in database.Columns where naming.IsValid(col.ColumnName) == false select col).ToList(); float percentInvalid = (float)invalidColumns.Count / columnsTotal; if (detected == false || percentInvalid > invalidThreshold) { var issue = new Issue(this, NoNamingConventionSeverity.Value); issue.Name = this.nonExisting; issue.Context = new DatabaseContext(database); issue.Description = new Description("Unable to find a naming convention for columns"); issueCollector.ReportIssue(issue); } else { //Raise an issue for all columns that don't use the convention foreach (Column col in invalidColumns) { var issue = new Issue(this, this.DefaultSeverity.Value); issue.Name = this.inconsistent; issue.Context = new ColumnContext(col); issue.Description = new Description("Column '{0}' in table {1} does not follow the naming convention", col, col.Table); issueCollector.ReportIssue(issue); } } //Tables if (this.NamingConventionRepresentation.Value == DataTypes.NamingConventionRepresentation.Markov) { naming = new MarkovConventionDetector(MarkovTolerance.Value / 100f); } else { naming = new TrieNameDetector(30); } var tableNames = database.Tables.Select(t => t.TableName); detected = naming.DetectConvention(tableNames); var invalidTables = (from table in database.Tables where naming.IsValid(table.TableName) == false select table).ToList(); float percentInvalidTables = (float)invalidTables.Count / database.Tables.Count; if (detected == false || percentInvalidTables > invalidThreshold) { var issue = new Issue(this, NoNamingConventionSeverity.Value); issue.Name = this.nonExisting; issue.Context = new DatabaseContext(database); issue.Description = new Description("Unable to find a naming convention for tables"); issueCollector.ReportIssue(issue); } else { foreach (Table table in invalidTables) { var issue = new Issue(this, this.DefaultSeverity.Value); issue.Name = this.inconsistent; issue.Context = new TableContext(table); issue.Description = new Description("Table name '{0}' does not follow the naming convention", table); issueCollector.ReportIssue(issue); } } }