// --------------------------- process steps --------------------------------------- // matches data asset names to table names void IdentifyDatabaseTableMatchesForAssets(string[,] mutuallyExclusive, string[,] associations, float[] associationStrengths) { // for each data asset // load and prepare the names var assets = from a in this.dataContext.TeamDataAssets select a; Console.WriteLine("Data Assets:"); foreach (var asset in assets) { TeamDataAssetSupplement semanticAsset = new TeamDataAssetSupplement("TeamDataAsset", asset.ID, asset.DataAssetName, mutuallyExclusive, associations, associationStrengths); internalData.Put(semanticAsset); // temp //SemanticString semanticName = semanticAsset.GetSemanticName(); //semanticName.DumpTokens(); Console.WriteLine(" " + semanticAsset.GetSemanticName().GetText()); } // for each database table // load and prepare the name (database + table) var servers = from a in this.dataContext.Servers select a; foreach (var server in servers) { foreach (var database in server.Databases) { foreach (var table in database.Tables) { TableSupplement semanticTable = new TableSupplement("Table", table.ID, database.Name + " " + table.Name, table); internalData.Put(semanticTable); } } } // compare each data asset to every table, keeping the best matches TeamDataAssetSupplement curAsset; TableSupplement curTable; SemanticString curAssetName; SemanticString curTableName; foreach (var asset in assets) { curAsset = (TeamDataAssetSupplement)internalData.Get("TeamDataAsset", asset.ID); curAssetName = curAsset.GetSemanticName(); bool allTablesChecked = false; int curID = 0; while (!allTablesChecked) { curID++; curTable = (TableSupplement)internalData.Get("Table", curID); if (curTable == null) allTablesChecked = true; else { curTableName = curTable.GetSemanticName(); float matchScore = curAssetName.Match(curTableName); curAsset.KeepIfWorthy(matchScore, curTable); } } // output to console curAsset.DumpBestMatches(); } }
void exerciseHashStore() { SupplementSet internalData = new SupplementSet(); TableSupplement obj = new TableSupplement("Table", 1); internalData.Put(obj); TableSupplement table = (TableSupplement)internalData.Get("Table", 1); if (table != null) Console.WriteLine("retrieved " + table.GetEntityType() + " " + table.GetID()); TableSupplement table2 = (TableSupplement)internalData.Get("Table", 2); if (table2 == null) Console.WriteLine("second retrieval correctly fails"); }
// maintains the top k scores for distinct simplified strings public void KeepIfWorthy(float score, TableSupplement table) { if (score <= bestMatchScores[MATCH_QUEUE_SIZE - 1]) return; bool inserted = false; for (int i=0; !inserted && i < MATCH_QUEUE_SIZE; i++) { // insert in the queue if the score is good enough and the simplified string is different if (score > bestMatchScores[i] && IsNewTableName(table.GetSemanticName().GetText())) { for (int j=0; j < MATCH_QUEUE_SIZE - i - 1; j++) { bestMatchScores[MATCH_QUEUE_SIZE - 1 - j] = bestMatchScores[MATCH_QUEUE_SIZE - 2 - j]; bestMatches[MATCH_QUEUE_SIZE - 1 - j] = bestMatches[MATCH_QUEUE_SIZE - 2 - j]; bestMatchDatabases[MATCH_QUEUE_SIZE - 1 - j] = bestMatchDatabases[MATCH_QUEUE_SIZE - 2 - j]; } bestMatchScores[i] = score; bestMatches[i] = table; bestMatchDatabases[i] = table.GetTableEntity().Database; inserted = true; } } }