/// <summary> /// Generate best mappings from the specified source set to all possible target tables in the specified schema. /// Best mappings from the source greater tables will be (re)used and created if they do not already exist in the mapper. /// </summary> public List <Mapping> MapSet(DcTable sourceSet, DcSchema targetSchema) { if (sourceSet.IsPrimitive) { return(MapPrimitiveSet((Schema.Table)sourceSet, targetSchema)); } DcSchema sourceSchema = sourceSet.Schema; List <Mapping> maps = new List <Mapping>(); Dictionary <DcColumn, Mapping> greaterMappings = new Dictionary <DcColumn, Mapping>(); // // 1. Find target greater tables. They are found among mappings and hence can contain both existing (in the schema) and new tables. // List <DcTable> targetOutputTabs = new List <DcTable>(); foreach (DcColumn sd in sourceSet.Columns) { Mapping gMapping = GetBestMapping(sd.Output, targetSchema); if (gMapping == null) // Either does not exist or cannot be built (for example, formally not possible or meaningless) { MapSet(sd.Output, targetSchema); // Recursion up to primitive tables if not computed and stored earlier gMapping = GetBestMapping(sd.Output, targetSchema); // Try again after generation } greaterMappings.Add(sd, gMapping); targetOutputTabs.Add(gMapping != null ? gMapping.TargetTab : null); } // // 2. Now find the best (existing) lesser set for the target greater tables. The best set should cover most of them by its greater columns // List <DcTable> allTargetTabs = targetSchema.AllSubTables; double[] coverage = new double[allTargetTabs.Count]; double maxCoverage = 0; int maxCoverageIndex = -1; for (int i = 0; i < allTargetTabs.Count; i++) { // Find coverage of this target set (how many best greater target tables it covers) coverage[i] = 0; foreach (DcColumn tgc in allTargetTabs[i].Columns) { DcTable tgs = tgc.Output; if (!targetOutputTabs.Contains(tgs)) { continue; } // TODO: Compare column names and then use it as a weight [0,1] instead of simply incrementing coverage[i] += 1; } coverage[i] /= targetOutputTabs.Count; // Normalize to [0,1] if (coverage[i] > 1) { coverage[i] = 1; // A lesser set can use (reference, cover) a greater set more than once } // Take into account individual similarity of the target set with the source set double nameSimilarity = StringSimilarity.ComputeStringSimilarity(sourceSet.Name, allTargetTabs[i].Name, 3); coverage[i] *= nameSimilarity; // TODO: Take into account difference in max ranks if (coverage[i] > maxCoverage) { maxCoverage = coverage[i]; maxCoverageIndex = i; } } // // 3. Create and store a mapping (or several mappings) // Mapping newMapping = null; if (maxCoverage < SetCreationThreshold) // Create new target set for mapping (and its greater columns) which will be accessible only via the mapping object (not via the schema) { DcTable ts = new Schema.Table(sourceSet.Name, sourceSet.Space); // New set has the same name as the soure set newMapping = new Mapping(sourceSet, ts); foreach (DcColumn sd in sourceSet.Columns) // For each source column, create one new target column { Mapping gMapping = greaterMappings[sd]; DcTable gts = gMapping.TargetTab; DcColumn td = targetSchema.Space.CreateColumn(sd.Name, ts, gts, sd.IsKey); // Create a clone for the source column newMapping.AddPaths(sd, td, gMapping); // Add a pair of columns as a match (with expansion using the specified greater mapping) } newMapping.Similarity = 1.0; maps.Add(newMapping); } else // Use existing target set(s) for mapping(s) { DcTable ts = allTargetTabs[maxCoverageIndex]; newMapping = new Mapping(sourceSet, ts); foreach (DcColumn sd in sourceSet.Columns) // For each source column, find best target column { Mapping gMapping = greaterMappings[sd]; DcTable gts = gMapping.TargetTab; // Find an existing column from ts to gts with the best similarity to source col sd DcColumn td = null; var tCols = ts.Columns.Where(d => d.Output == gts); // All target columns from ts to gts if (tCols != null && tCols.Count() > 0) { // TODO: In fact, we need to choose the best column, for example, by comparing their names, usages, ranks and other semantic factors td = tCols.ToList()[0]; } if (td == null) // No good target column found (the source column is not covered) { continue; // TODO: Maybe create a new target column rather than simply ingnoring it } //td.IsIdentity = sd.IsIdentity; newMapping.AddPaths(sd, td, gMapping); // Add a pair of columnss as a match (with expansion using the specified greater mapping) } newMapping.Similarity = maxCoverage; maps.Add(newMapping); } Mappings.AddRange(maps); return(maps); }