示例#1
0
        /// <summary>
        /// If a set is not included in the schema then include it. Inclusion is performed by storing all columns into the set including (a new) super-column.
        /// TODO: In fact, all elements should have super-columns which specify the parent set or the root of the schema to include into, and then the parameter is not needed.
        /// </summary>
        public void AddToSchema(DcSchema top)
        {
            if (Set.IsPrimitive)
            {
                // Check that the schema has this primitive set and add an equivalent primitive set if it is absent (determined via default mapping)
            }
            else
            {
                if (!Set.IsSubTable(top.Root))
                {
                    //top.AddTable(Set, null, null);
                }
            }

            if (Column != null && Column.Input != Column.Output)
            {
                //Column.Add();
            }

            foreach (ColumnTree node in Children)
            {
                if (node.IsEmpty)
                {
                    continue;          // Root has no column
                }
                node.AddToSchema(top); // Recursion
            }
        }
示例#2
0
        public Mapping GetBestMapping(DcSchema sourceSchema, DcTable targetSet)
        {
            Mapping bestMapping = GetBestMapping(targetSet, sourceSchema);

            bestMapping.Invert();
            return(bestMapping);
        }
示例#3
0
        public ImportMappingBox(DcSchema sourceSchema, DcSchema targetSchema, DcColumn column, List <DcColumn> initialColumns)
        {
            SourceColumnEntries = new ObservableCollection <ImportMappingEntry>();

            this.chooseSourceCommand = new DelegateCommand(this.ChooseSourceCommand_Executed, this.ChooseSourceCommand_CanExecute);
            this.okCommand           = new DelegateCommand(this.OkCommand_Executed, this.OkCommand_CanExecute);

            if (column.Output.SuperColumn != null)
            {
                IsNew = false;
            }
            else
            {
                IsNew = true;
            }

            SourceSchema = sourceSchema;
            Column       = column;
            TargetSchema = targetSchema;

            if (!IsNew)
            {
                hasHeaderRecord.IsChecked = ((SetCsv)Column.Input).HasHeaderRecord;
                decimalChar.SelectedItem  = ((SetCsv)Column.Input).Delimiter;
            }

            Initialize();

            InitializeComponent();
        }
示例#4
0
        private void OkCommand_Executed(object state)
        {
            DcSchema schema = Column.Input.Schema;

            // Column name
            Column.Name = newColumnName.Text;

            // Column type
            DcTable targetTable = null;

            if (AggregationFunction == "COUNT")
            {
                targetTable = schema.GetPrimitive("Integer");;
            }
            else
            {
                targetTable = MeasurePath.Output; // The same as the measure path
            }
            Column.Output = targetTable;

            // Column definition
            Column.Definition.DefinitionType = DcColumnDefinitionType.AGGREGATION;
            Column.Definition.FactTable      = FactTable;
            Column.Definition.GroupPaths.Clear();
            Column.Definition.GroupPaths.Add(GroupingPath);
            Column.Definition.MeasurePaths.Clear();
            Column.Definition.MeasurePaths.Add(MeasurePath);
            Column.Definition.Updater = AggregationFunction;

            this.DialogResult = true;
        }
示例#5
0
        public void CreateSampleSchema(DcSchema schema)
        {
            DcSpace space = schema.Space;

            DcColumn d1, d2, d3, d4;

            DcTable departments = space.CreateTable(DcSchemaKind.Dc, "Departments", schema.Root);

            d1 = space.CreateColumn("name", departments, schema.GetPrimitiveType("String"), true);
            d2 = space.CreateColumn("location", departments, schema.GetPrimitiveType("String"), false);

            DcTableWriter writer;

            writer = departments.GetData().GetTableWriter();
            writer.Open();
            writer.Append(new DcColumn[] { d1, d2 }, new object[] { "SALES", "Dresden" });
            writer.Append(new DcColumn[] { d1, d2 }, new object[] { "HR", "Walldorf" });
            writer.Close();

            DcTable employees = space.CreateTable(DcSchemaKind.Dc, "Employees", schema.Root);

            d1 = space.CreateColumn("name", employees, schema.GetPrimitiveType("String"), true);
            d2 = space.CreateColumn("age", employees, schema.GetPrimitiveType("Double"), false);
            d3 = space.CreateColumn("salary", employees, schema.GetPrimitiveType("Double"), false);
            d4 = space.CreateColumn("dept", employees, departments, false);

            DcTable managers = space.CreateTable(DcSchemaKind.Dc, "Managers", employees);

            d1 = space.CreateColumn("title", managers, schema.GetPrimitiveType("String"), false);
            d2 = space.CreateColumn("is project manager", managers, schema.GetPrimitiveType("Boolean"), false);
        }
示例#6
0
        public FreeColumnBox(DcSchema schema, DcTable table)
        {
            this.okCommand = new DelegateCommand(this.OkCommand_Executed, this.OkCommand_CanExecute);

            //
            // Options and regime of the dialog
            //
            if (table.SuperColumn != null)
            {
                IsNew = false;
            }
            else
            {
                IsNew = true;
            }

            Schema    = schema;
            Table     = table;
            TableName = Table.Name;

            Entries = new ObservableCollection <GreaterTableEntry>();

            CreateEntries();

            InitializeComponent();

            RefreshAll();
        }
示例#7
0
        public void CollectionChanged(object sender, NotifyCollectionChangedEventArgs e)
        {
            MainWindow vm = ((MainWindow)DataContext);

            if (e.Action == NotifyCollectionChangedAction.Add) // Decide if this node has to add a new child node
            {
                DcSchema sch = e.NewItems != null && e.NewItems.Count > 0 && e.NewItems[0] is DcSchema ? (DcSchema)e.NewItems[0] : null;
                if (sch == null)
                {
                    return;
                }
                if (vm.SchemaList.Contains(sch))
                {
                    return;
                }

                vm.SchemaList.Add(sch);
            }
            else if (e.Action == NotifyCollectionChangedAction.Remove)
            {
                DcSchema sch = e.OldItems != null && e.OldItems.Count > 0 && e.OldItems[0] is DcSchema ? (DcSchema)e.OldItems[0] : null;
                if (sch == null)
                {
                    return;
                }
                if (!vm.SchemaList.Contains(sch))
                {
                    return;
                }

                vm.SchemaList.Remove(sch);
            }
        }
示例#8
0
        public virtual void FromJson(JObject json, DcSpace ws)
        {
            // List of schemas
            foreach (JObject schema in json["schemas"])
            {
                DcSchema sch = (DcSchema)Utils.CreateObjectFromJson(schema);
                if (sch != null)
                {
                    sch.FromJson(schema, this);
                    _schemas.Add(sch);
                }
            }

            // List of tables
            foreach (JObject table in json["tables"])
            {
                DcTable tab = (DcTable)Utils.CreateObjectFromJson(table);
                if (tab != null)
                {
                    tab.FromJson(table, this);
                    _tables.Add(tab);
                }
            }

            // Load all columns from all schema (now all tables are present)
            foreach (JObject schema in json["schemas"])
            {
                foreach (JObject column in schema["columns"]) // List of columns
                {
                    DcColumn col = (DcColumn)Utils.CreateObjectFromJson(column);
                    if (col != null)
                    {
                        col.FromJson(column, this);
                        _columns.Add(col);
                    }
                }
            }

            // Second pass on all columns with the purpose to load their definitions (now all columns are present)
            foreach (JObject schema in json["schemas"])
            {
                foreach (JObject column in schema["columns"]) // List of columns
                {
                    DcColumn col = (DcColumn)Utils.CreateObjectFromJson(column);
                    if (col != null)
                    {
                        col.FromJson(column, this);

                        // Find the same existing column (possibly without a definition)
                        DcColumn existing = col.Input.GetColumn(col.Name);

                        // Copy the definition
                        existing.FromJson(column, this);
                    }
                }
            }
        }
示例#9
0
        public List <Mapping> MapPrimitiveSet(DcSchema sourceSchema, DcTable targetSet)
        {
            List <Mapping> maps = MapPrimitiveSet(targetSet, sourceSchema);

            maps.ForEach(m => Mappings.Remove(m));

            maps.ForEach(m => m.Invert());
            Mappings.AddRange(maps);
            return(maps);
        }
示例#10
0
 public virtual List <DcTable> GetTables(DcSchema schema)
 {
     if (schema == null)
     {
         return(new List <DcTable>(_tables));
     }
     else
     {
         return(_tables.Where(x => x.Schema == schema).ToList());
     }
 }
示例#11
0
        public Mapping GetBestMapping(DcTable sourceSet, DcSchema targetSchema) // Find best mapping in the cache
        {
            Mapping bestMapping = null;
            var     setMappings = Mappings.Where(m => m.SourceTab == sourceSet && (m.TargetTab.Schema == null || m.TargetTab.Schema == targetSchema)); // Find available mappings

            if (setMappings.Count() > 0)
            {
                double bestSimilarity = setMappings.Max(m => m.Similarity);
                bestMapping = setMappings.First(m => m.Similarity == bestSimilarity);
            }

            return(bestMapping);
        }
示例#12
0
        public virtual DcTable CreateTable(DcSchemaKind schemaType, string name, DcTable parent)
        {
            DcSchema schema = parent.Schema;
            //DcSchemaKind schemaType = schema.GetSchemaKind();

            DcTable table;
            Column  column;
            string  colName;

            if (parent is DcSchema)
            {
                colName = "Top";
            }
            else
            {
                colName = "Super";
            }

            if (schemaType == DcSchemaKind.Dc)
            {
                table  = new Table(name, this);
                column = new Column(colName, table, parent, true, true);
            }
            else if (schemaType == DcSchemaKind.Csv)
            {
                table  = new TableCsv(name, this);
                column = new ColumnCsv(colName, table, parent, true, true);
            }
            else if (schemaType == DcSchemaKind.Oledb)
            {
                table  = new TableRel(name, this);
                column = new ColumnRel(colName, table, parent, true, true);
            }
            else if (schemaType == DcSchemaKind.Rel)
            {
                table  = new TableRel(name, this);
                column = new ColumnRel(colName, table, parent, true, true);
            }
            else
            {
                throw new NotImplementedException("This schema type is not implemented.");
            }

            _tables.Add(table);
            NotifyAdd(table);

            _columns.Add(column);
            NotifyAdd(column);

            return(table);
        }
示例#13
0
        private void OkCommand_Executed(object state)
        {
            DcSchema schema = Column.Input.Schema;

            // Column name
            if (IsWhere)
            {
                SourceTable.Name = sourceTableName.Text;
            }
            else
            {
                Column.Name = newColumnName.Text;
            }

            ExprNode expr = null;

            if (ExpressionModel == null || ExpressionModel.Count == 0)
            {
                expr = null;
            }
            else
            {
                expr = ExpressionModel[0];
            }

            if (IsWhere) // Expression is part of the table Where definition
            {
                if (expr != null)
                {
                    expr.OutputVariable.TypeName  = "Boolean";
                    expr.OutputVariable.TypeTable = schema.GetPrimitive("Boolean");
                }

                SourceTable.Definition.WhereExpr = expr;
            }
            else // Expression belongs to the column definition
            {
                // Column type
                // Derive output type of the expression and use it to set the type of the column.
                // Alternatively, the type could be chosen by the user precisely as it is done for link columns.
                expr.Resolve(schema.Workspace, new List <DcVariable>()
                {
                    new Variable(SourceTable, "this")
                });
                Column.Output = expr.OutputVariable.TypeTable;

                Column.Definition.FormulaExpr = expr;
            }

            this.DialogResult = true;
        }
示例#14
0
        public void AddTargetToSchema(DcSchema schema = null) // Ensure that all target elements exist in the specified schema
        {
            // The mapping can reference new elements which are not in the schema yet so we try to find them and add if necessary

            if (schema == null) // Find the schema from the mapping elements
            {
                PathMatch match = Matches.FirstOrDefault(m => m.TargetPath.Output.IsPrimitive);
                schema = match != null ? match.TargetPath.Output.Schema : null; // We assume that primitive tables always have root defined (other tables might not have been added yet).
            }

            ColumnTree tree = GetTargetTree();

            tree.AddToSchema(schema);
        }
示例#15
0
        public void CsvReadTest() // Load Csv schema and data as a result of evaluation
        {
            DcSpace space = new Space();

            // Create schema for a remote db
            SchemaCsv top = (SchemaCsv)space.CreateSchema("My Files", DcSchemaKind.Csv);

            // Create a remote file description
            TableCsv table = (TableCsv)space.CreateTable(DcSchemaKind.Csv, "Products", top.Root);

            table.FilePath = CsvRead;
            var columns = top.LoadSchema(table);

            Assert.AreEqual(1, top.Root.SubTables.Count);
            Assert.AreEqual(15, top.GetSubTable("Products").Columns.Count);

            Assert.AreEqual("String", top.GetSubTable("Products").GetColumn("Product Name").Output.Name);
            Assert.AreEqual("3", ((ColumnCsv)top.GetSubTable("Products").GetColumn("ID")).SampleValues[1]);

            //
            // Configure import
            //
            DcSchema schema = space.CreateSchema("My Schema", DcSchemaKind.Dc);

            DcTable productsTable = space.CreateTable(DcSchemaKind.Dc, "Products", schema.Root);

            // Manually create column to be imported (we need an automatic mechanism for appending missing columns specified in the formula)
            DcColumn p1 = space.CreateColumn("ID", productsTable, schema.GetPrimitiveType("Integer"), true);
            DcColumn p2 = space.CreateColumn("Product Code", productsTable, schema.GetPrimitiveType("String"), false);
            DcColumn p3 = space.CreateColumn("Custom Product Name", productsTable, schema.GetPrimitiveType("String"), false);
            DcColumn p4 = space.CreateColumn("List Price", productsTable, schema.GetPrimitiveType("Double"), false);
            DcColumn p5 = space.CreateColumn("Constant Column", productsTable, schema.GetPrimitiveType("Double"), false);

            // Define import column
            DcColumn col = space.CreateColumn("Import", top.GetSubTable("Products"), productsTable, false);

            col.GetData().IsAppendData   = true;
            col.GetData().Formula        = "(( [Integer] [ID] = this.[ID], [String] [Product Code] = [Product Code], [String] [Custom Product Name] = [Product Name], [Double] [List Price] = [List Price], [Double] [Constant Column] = 20.02 ))"; // Tuple structure corresponds to output table
            col.GetData().IsAppendData   = true;
            col.GetData().IsAppendSchema = true;

            productsTable.GetData().Populate();

            Assert.AreEqual(45, productsTable.GetData().Length);
            Assert.AreEqual("Northwind Traders Dried Pears", p3.GetData().GetValue(5));
            Assert.AreEqual(20.02, p5.GetData().GetValue(5));
        }
示例#16
0
        protected void ColumnRenamed(string newName)
        {
            DcSpace  space  = this.Input.Space;
            DcSchema schema = this.Input.Schema;
            DcColumn column = this;

            //
            // Check all elements of the schema that can store column name (tables, columns etc.)
            // Update their definition so that it uses the new name of the specified element
            //
            List <DcTable> tables = space.GetTables(schema); // schema.AllSubTables;
            var            nodes  = new List <ExprNode>();

            foreach (var tab in tables)
            {
                if (tab.IsPrimitive)
                {
                    continue;
                }

                foreach (var col in tab.Columns)
                {
                    if (col.GetData() == null)
                    {
                        continue;
                    }
                    DcColumnData data = col.GetData();

                    /* REFACTOR: Here essentially we want to manually find all uses and hence have to use dependencies API
                     * if (data.FormulaExpr != null)
                     * {
                     *  nodes = data.FormulaExpr.Find((DcColumn)column);
                     *  nodes.ForEach(x => x.Name = newName);
                     * }
                     */
                }

                // Update table definitions by finding the uses of the specified column
                if (tab.GetData().WhereExpr != null)
                {
                    nodes = tab.GetData().WhereExpr.Find((DcColumn)column);
                    nodes.ForEach(x => x.Name = newName);
                }
            }

            column.Name = newName;
        }
示例#17
0
        /// <summary>
        /// Import the specified set along with all its greater tables.
        /// The set is not populated but is ready to be populated.
        /// It is a convenience method simplifying a typical operation.
        /// </summary>
        public static DcTable ImportSet(DcTable sourceSet, DcSchema targetSchema)
        {
            Mapper mapper = new Mapper();

            mapper.SetCreationThreshold = 1.0;
            mapper.MapSet(sourceSet, targetSchema);
            Mapping mapping = mapper.GetBestMapping(sourceSet, targetSchema);

            mapping.AddTargetToSchema(targetSchema);

            // Define the column
            //DcColumn colImport = new Column(mapping);
            //colImport.Add();

            // Define the table
            return(mapping.TargetTab);
        }
示例#18
0
        public void Resolve(DcSpace space)
        {
            if (!string.IsNullOrEmpty(SchemaName))
            {
                // 1. Resolve schema name
                TypeSchema = space.GetSchema(SchemaName);
                if (TypeSchema == null)
                {
                    return;                     // Cannot resolve
                }
                // 2. Resolve table name
                TypeTable = TypeSchema.GetSubTable(TypeName);
                if (TypeTable == null)
                {
                    return;                    // Cannot resolve
                }
            }
            else if (!string.IsNullOrEmpty(TypeName)) // No schema name (imcomplete info)
            {
                // 1. try to find the table in the mashup
                DcSchema mashup = space.GetSchemas().FirstOrDefault(x => x.GetSchemaKind() == DcSchemaKind.Dc);
                if (mashup != null)
                {
                    TypeTable = mashup.GetSubTable(TypeName);
                    if (TypeTable != null)
                    {
                        TypeSchema = mashup;
                        SchemaName = TypeSchema.Name; // We also reconstruct the name
                        return;
                    }
                }

                // 2. try to find the table in any other schema
                foreach (DcSchema schema in space.GetSchemas())
                {
                    TypeTable = schema.GetSubTable(TypeName);
                    if (TypeTable != null)
                    {
                        TypeSchema = schema;
                        SchemaName = TypeSchema.Name; // We also reconstruct the name
                        return;
                    }
                }
            }
        }
示例#19
0
        // It is called when preparing this dialog for editing/adding a column or when context changes (not during the process)
        // Essentially, we do it when data context is set.
        private void initViewModel()
        {
            // Populate possible schemas using all schemas from the space with some limitations.
            // Rule/constraints for possible schemas:
            // - If input schema (always non-null) is remote, then output schema is only Mashup
            DcSpace  space  = mainVM.Space;
            DcSchema schema = Table.Schema;

            OutputSchemas.Clear();
            List <DcSchema> allSchemas = space.GetSchemas();

            allSchemas.ForEach(x => OutputSchemas.Add(x));

            if (IsNew)
            {
                // Set default parameters: name, schema (e.g., if single), type, key
                ColumnName    = "New Column";
                IsKey         = false;
                ColumnFormula = "";

                // Set selections
                SelectedOutputSchema = mainVM.MashupTop;

                // Set enabled/disabled
                //targetSchemaList.IsEnabled = true;
                //targetTableList.IsEnabled = true;
            }
            else
            {
                // Set existing column parameters: name, schema, type, key
                ColumnName    = Column.Name;
                IsKey         = Column.IsKey;
                ColumnFormula = Column.GetData().Formula;

                // Set selections
                SelectedOutputSchema = Column.Output.Schema;
                SelectedOutputTable  = Column.Output;

                // Set enabled/disabled
                //targetSchemaList.IsEnabled = false;
                //targetTableList.IsEnabled = false;
            }

            FirePropertyNotifyChanged("");
        }
示例#20
0
        public virtual void DeleteSchema(DcSchema schema)
        {
            // We have to ensure that inter-schema (import/export) columns are also deleted
            List <DcTable> allTables = this.GetTables(schema); // schema.AllSubTables;

            foreach (DcTable t in allTables)
            {
                if (t.IsPrimitive)
                {
                    continue;
                }
                this.DeleteTable(t);
            }

            _schemas.Remove(schema);

            NotifyRemove(schema);
        }
示例#21
0
        private void OkCommand_Executed(object state)
        {
            DcSchema schema = Column.Input.Schema;

            // Column name
            Column.Name = newColumnName.Text;

            // Column type
            if (IsNew)
            {
                Column.Output = (DcTable)targetTables.SelectedItem;
            }

            // Column definition
            Column.Definition.DefinitionType = DcColumnDefinitionType.LINK;
            Column.Definition.Mapping        = MappingModel.Mapping;
            Column.Definition.IsAppendData   = false;

            this.DialogResult = true;
        }
示例#22
0
        public static void CreateSampleSchema(DcSchema schema)
        {
            DcSpace space = schema.Space;

            // Table 1
            DcTable t1 = space.CreateTable(DcSchemaKind.Dc, "Table 1", schema.Root);

            DcColumn c11 = space.CreateColumn("Column 11", t1, schema.GetPrimitiveType("Integer"), true);
            DcColumn c12 = space.CreateColumn("Column 12", t1, schema.GetPrimitiveType("String"), true);
            DcColumn c13 = space.CreateColumn("Column 13", t1, schema.GetPrimitiveType("Double"), false);
            DcColumn c14 = space.CreateColumn("Column 14", t1, schema.GetPrimitiveType("Decimal"), false);

            // Table 2
            DcTable t2 = space.CreateTable(DcSchemaKind.Dc, "Table 2", schema.Root);

            DcColumn c21 = space.CreateColumn("Column 21", t2, schema.GetPrimitiveType("String"), true);
            DcColumn c22 = space.CreateColumn("Column 22", t2, schema.GetPrimitiveType("Integer"), true);
            DcColumn c23 = space.CreateColumn("Column 23", t2, schema.GetPrimitiveType("Double"), false);
            DcColumn c24 = space.CreateColumn("Table 1", t2, t1, false);
        }
示例#23
0
        public void CsvWriteTest() // Store schema and data to a CSV file as a result of evaluation
        {
            DcSpace  space  = new Space();
            DcSchema schema = space.CreateSchema("My Schema", DcSchemaKind.Dc);

            CoreTest.CreateSampleSchema(schema);

            CoreTest.CreateSampleData(schema);

            DcTable t2 = schema.GetSubTable("Table 2");

            DcColumn c21 = t2.GetColumn("Column 21");
            DcColumn c22 = t2.GetColumn("Column 22");
            DcColumn c23 = t2.GetColumn("Column 23");

            //
            // Create schema for a remote db
            //
            SchemaCsv top = (SchemaCsv)space.CreateSchema("My Files", DcSchemaKind.Csv);

            // Create a remote file description
            TableCsv table = (TableCsv)space.CreateTable(DcSchemaKind.Csv, "Table_1", top.Root);

            table.FilePath = CsvWrite;

            // Manually create column to be imported (we need an automatic mechanism for appending missing columns specified in the formula)
            DcColumn p1 = space.CreateColumn("Column 11", table, top.GetPrimitiveType("String"), true);
            DcColumn p2 = space.CreateColumn("Column 12", table, top.GetPrimitiveType("String"), true);
            DcColumn p3 = space.CreateColumn("Custom Column 13", table, top.GetPrimitiveType("String"), true);
            DcColumn p4 = space.CreateColumn("Constant Column", table, top.GetPrimitiveType("String"), true);

            // Define export column
            DcColumn col = space.CreateColumn("Export", schema.GetSubTable("Table 1"), table, false);

            col.GetData().IsAppendData   = true;
            col.GetData().Formula        = "(( [String] [Column 11] = this.[Column 11], [String] [Column 12] = [Column 12], [String] [Custom Column 13] = [Column 13], [String] [Constant Column] = 20.02 ))"; // Tuple structure corresponds to output table
            col.GetData().IsAppendData   = true;
            col.GetData().IsAppendSchema = true;

            table.Populate();
        }
示例#24
0
 public void SetUp()
 {
     space  = new Space();
     schema = space.CreateSchema("My Schema", DcSchemaKind.Dc);
     CoreTest.CreateSampleSchema(schema);
 }
示例#25
0
        public List <Mapping> MapPrimitiveSet(DcTable sourceSet, DcSchema targetSchema)
        {
            DcSchema       sourceSchema = sourceSet.Schema;
            List <Mapping> maps         = new List <Mapping>();
            DcTable        targetSet;

            if (sourceSchema.GetType() == typeof(Schema.Schema))     // Schema -> *
            {
                if (targetSchema.GetType() == typeof(Schema.Schema)) // Schema -> Schema
                {
                    targetSet = targetSchema.GetPrimitiveType(sourceSet.Name);
                    Mapping map = new Mapping(sourceSet, targetSet);
                    map.Similarity = 1.0;
                    maps.Add(map);
                }
                else if (targetSchema.GetType() == typeof(SchemaOledb)) // Schema -> SchemaOledb
                {
                    throw new NotImplementedException();
                }
            }
            else if (sourceSchema is SchemaOledb)                                                           // SchemaOledb -> *
            {
                if (targetSchema.GetType() == typeof(Schema.Schema))                                        // SchemaOledb -> Schema
                {
                    OleDbType sourceType = (OleDbType)Enum.Parse(typeof(OleDbType), sourceSet.Name, false); // Convert type representation: from name to enum (equivalent)
                    string    targetType;

                    // Mappings:
                    // http://msdn.microsoft.com/en-us/library/system.data.oledb.oledbtype(v=vs.110).aspx
                    // http://msdn.microsoft.com/en-us/library/cc668759(v=vs.110).aspx
                    switch (sourceType)
                    {                                // Integers
                    case OleDbType.BigInt:           // DBTYPE_I8 -> Int64
                    case OleDbType.Integer:          // DBTYPE_I4 -> Int32
                    case OleDbType.SmallInt:         // DBTYPE_I2 -> Int16
                    case OleDbType.TinyInt:          // DBTYPE_I1 -> SByte
                    case OleDbType.UnsignedBigInt:   // DBTYPE_UI8 -> UInt64
                    case OleDbType.UnsignedInt:      // DBTYPE_UI4 -> UInt32
                    case OleDbType.UnsignedSmallInt: // DBTYPE_UI2 -> UInt16
                    case OleDbType.UnsignedTinyInt:  // DBTYPE_UI1 -> Byte
                        targetType = "Integer";
                        break;

                    // Double
                    case OleDbType.Double:     // DBTYPE_R8
                    case OleDbType.Single:     // DBTYPE_R4 -> Single
                        targetType = "Double";
                        break;

                    // Decimal
                    case OleDbType.Currency:    // DBTYPE_CY
                    case OleDbType.Decimal:     // DBTYPE_DECIMAL
                    case OleDbType.Numeric:     // DBTYPE_NUMERIC
                    case OleDbType.VarNumeric:
                        targetType = "Decimal";
                        break;

                    // Boolean
                    case OleDbType.Boolean:     // DBTYPE_BOOL
                        targetType = "Boolean";
                        break;

                    // DateTime
                    case OleDbType.Date:        // DBTYPE_DATE
                    case OleDbType.DBDate:      // DBTYPE_DBDATE
                    case OleDbType.DBTime:      // DBTYPE_DBTIME ->  TimeSpan
                    case OleDbType.DBTimeStamp: // DBTYPE_DBTIMESTAMP
                    case OleDbType.Filetime:    // DBTYPE_FILETIME
                        targetType = "DateTime";
                        break;

                    // Strings
                    case OleDbType.BSTR:         // DBTYPE_BSTR
                    case OleDbType.Char:         // DBTYPE_STR
                    case OleDbType.LongVarChar:  //
                    case OleDbType.LongVarWChar: //
                    case OleDbType.VarChar:      //
                    case OleDbType.VarWChar:     //
                    case OleDbType.WChar:        // DBTYPE_WSTR
                        targetType = "String";
                        break;

                    // Binary
                    case OleDbType.Binary:        // DBTYPE_BYTES -> Array of type Byte
                    case OleDbType.LongVarBinary: // Array of type Byte
                    case OleDbType.VarBinary:     // Array of type Byte

                    // NULL
                    case OleDbType.Empty:       // DBTYPE_EMPTY

                    case OleDbType.Guid:        // DBTYPE_GUID -> Guid
                    case OleDbType.Error:       // DBTYPE_ERROR -> Exception
                    case OleDbType.IDispatch:   // DBTYPE_IDISPATCH -> Object
                    case OleDbType.IUnknown:    // DBTYPE_UNKNOWN -> Object

                    case OleDbType.PropVariant: // DBTYPE_PROP_VARIANT -> Object
                    case OleDbType.Variant:     // DBTYPE_VARIANT -> Object
                        targetType = null;
                        break;

                    default:
                        targetType = null;
                        break;
                    }

                    targetSet = targetSchema.GetPrimitiveType(targetType);

                    Mapping map = new Mapping(sourceSet, targetSet);
                    map.Similarity = 1.0;
                    maps.Add(map);
                }
                else if (targetSchema.GetType() == typeof(SchemaOledb)) // SchemaOledb -> SchemaOledb
                {
                    targetSet = targetSchema.GetPrimitiveType(sourceSet.Name);
                    Mapping map = new Mapping(sourceSet, targetSet);
                    map.Similarity = 1.0;
                    maps.Add(map);
                }
            }
            else if (sourceSchema is SchemaCsv)                      // SchemaCsv -> *
            {
                if (targetSchema.GetType() == typeof(Schema.Schema)) // SchemaCsv -> Schema
                {
                    string targetType = "String";
                }
            }

            Mappings.AddRange(maps);
            return(maps);
        }
示例#26
0
        /// <summary>
        /// Create and initialize a new mapping which produces a flat target set with all primitive columns for copying primitive data from the source set.
        /// Only identity (PK) source columns are expanded recursively.
        /// For relational source, this means that all primitive columns of the source table will be mapped with their relational names, no FK-referenced tables will be joined and no artifical column names will be used.
        /// If it is necessary to expand entity columns (non-PK columns of joined tables) then a different implementation is needed (which will require joins, artifical column/path names etc.)
        /// </summary>
        public Mapping CreatePrimitive(DcTable sourceSet, DcTable targetSet, DcSchema targetSchema)
        {
            Debug.Assert(!sourceSet.IsPrimitive && !targetSet.IsPrimitive, "Wrong use: copy mapping can be created for only non-primitive tables.");
            Debug.Assert(targetSchema != null || targetSet.Schema != null, "Wrong use: target schema must be specified.");

            Mapping map = new Mapping(sourceSet, targetSet);

            DcSchema sourceSchema = map.SourceTab.Schema;

            if (targetSchema == null)
            {
                targetSchema = targetSet.Schema;
            }

            ColumnPath sp;
            ColumnPath tp;

            DcColumn td;

            PathMatch match;

            if (sourceSchema is SchemaOledb)
            {
                TableRel set = (TableRel)map.SourceTab;
                foreach (ColumnAtt att in set.GreaterPaths)
                {
                    sp = new ColumnAtt(att);

                    // Recommend matching target type (mapping primitive types)
                    this.MapPrimitiveSet(att.Output, targetSchema);
                    DcTable targetType = this.GetBestTargetSet(att.Output, targetSchema);

                    td      = new Schema.Column(att.RelationalColumnName, map.TargetTab, targetType, att.IsKey, false);
                    tp      = new ColumnPath(td);
                    tp.Name = sp.Name;

                    match = new PathMatch(sp, tp, 1.0);

                    map.Matches.Add(match);
                }
            }
            else if (sourceSchema is SchemaCsv)
            {
                DcTable set = (DcTable)map.SourceTab;
                foreach (DcColumn sd in set.Columns)
                {
                    if (sd.IsSuper)
                    {
                        continue;
                    }

                    // Recommend matching target type (mapping primitive types)
                    //this.MapPrimitiveSet(sd, targetSchema);
                    //ComTable targetType = this.GetBestTargetSet(sd.Output, targetSchema);

                    //
                    // Analyze sample values of sd and choose the most specific target type
                    //
                    List <string> values = ((ColumnCsv)sd).SampleValues;

                    string targetTypeName;
                    if (Com.Schema.Utils.isInt32(values.ToArray()))
                    {
                        targetTypeName = "Integer";
                    }
                    else if (Com.Schema.Utils.isDouble(values.ToArray()))
                    {
                        targetTypeName = "Double";
                    }
                    else
                    {
                        targetTypeName = "String";
                    }

                    DcTable targetType = targetSchema.GetPrimitiveType(targetTypeName);

                    td = targetSchema.Space.CreateColumn(sd.Name, map.TargetTab, targetType, sd.IsKey);

                    sp = new ColumnPath(sd);
                    tp = new ColumnPath(td);

                    match = new PathMatch(sp, tp, 1.0);

                    map.Matches.Add(match);
                }
            }

            return(map);
        }
示例#27
0
        /// <summary>
        /// Generate best mappings from the specified source set to all possible target tables in the specified schema.
        /// Best mappings from the source greater tables will be (re)used and created if they do not already exist in the mapper.
        /// </summary>
        public List <Mapping> MapSet(DcTable sourceSet, DcSchema targetSchema)
        {
            if (sourceSet.IsPrimitive)
            {
                return(MapPrimitiveSet((Schema.Table)sourceSet, targetSchema));
            }
            DcSchema       sourceSchema = sourceSet.Schema;
            List <Mapping> maps         = new List <Mapping>();

            Dictionary <DcColumn, Mapping> greaterMappings = new Dictionary <DcColumn, Mapping>();

            //
            // 1. Find target greater tables. They are found among mappings and hence can contain both existing (in the schema) and new tables.
            //
            List <DcTable> targetOutputTabs = new List <DcTable>();

            foreach (DcColumn sd in sourceSet.Columns)
            {
                Mapping gMapping = GetBestMapping(sd.Output, targetSchema);

                if (gMapping == null)                                   // Either does not exist or cannot be built (for example, formally not possible or meaningless)
                {
                    MapSet(sd.Output, targetSchema);                    // Recursion up to primitive tables if not computed and stored earlier
                    gMapping = GetBestMapping(sd.Output, targetSchema); // Try again after generation
                }

                greaterMappings.Add(sd, gMapping);

                targetOutputTabs.Add(gMapping != null ? gMapping.TargetTab : null);
            }

            //
            // 2. Now find the best (existing) lesser set for the target greater tables. The best set should cover most of them by its greater columns
            //
            List <DcTable> allTargetTabs = targetSchema.AllSubTables;

            double[] coverage         = new double[allTargetTabs.Count];
            double   maxCoverage      = 0;
            int      maxCoverageIndex = -1;

            for (int i = 0; i < allTargetTabs.Count; i++)
            {
                // Find coverage of this target set (how many best greater target tables it covers)
                coverage[i] = 0;
                foreach (DcColumn tgc in allTargetTabs[i].Columns)
                {
                    DcTable tgs = tgc.Output;
                    if (!targetOutputTabs.Contains(tgs))
                    {
                        continue;
                    }

                    // TODO: Compare column names and then use it as a weight [0,1] instead of simply incrementing
                    coverage[i] += 1;
                }
                coverage[i] /= targetOutputTabs.Count; // Normalize to [0,1]
                if (coverage[i] > 1)
                {
                    coverage[i] = 1;                  // A lesser set can use (reference, cover) a greater set more than once
                }
                // Take into account individual similarity of the target set with the source set
                double nameSimilarity = StringSimilarity.ComputeStringSimilarity(sourceSet.Name, allTargetTabs[i].Name, 3);
                coverage[i] *= nameSimilarity;

                // TODO: Take into account difference in max ranks

                if (coverage[i] > maxCoverage)
                {
                    maxCoverage      = coverage[i];
                    maxCoverageIndex = i;
                }
            }

            //
            // 3. Create and store a mapping (or several mappings)
            //
            Mapping newMapping = null;

            if (maxCoverage < SetCreationThreshold)                             // Create new target set for mapping (and its greater columns) which will be accessible only via the mapping object (not via the schema)
            {
                DcTable ts = new Schema.Table(sourceSet.Name, sourceSet.Space); // New set has the same name as the soure set

                newMapping = new Mapping(sourceSet, ts);

                foreach (DcColumn sd in sourceSet.Columns) // For each source column, create one new target column
                {
                    Mapping gMapping = greaterMappings[sd];
                    DcTable gts      = gMapping.TargetTab;

                    DcColumn td = targetSchema.Space.CreateColumn(sd.Name, ts, gts, sd.IsKey); // Create a clone for the source column

                    newMapping.AddPaths(sd, td, gMapping);                                     // Add a pair of columns as a match (with expansion using the specified greater mapping)
                }

                newMapping.Similarity = 1.0;
                maps.Add(newMapping);
            }
            else // Use existing target set(s) for mapping(s)
            {
                DcTable ts = allTargetTabs[maxCoverageIndex];

                newMapping = new Mapping(sourceSet, ts);

                foreach (DcColumn sd in sourceSet.Columns) // For each source column, find best target column
                {
                    Mapping gMapping = greaterMappings[sd];
                    DcTable gts      = gMapping.TargetTab;

                    // Find an existing column from ts to gts with the best similarity to source col sd
                    DcColumn td    = null;
                    var      tCols = ts.Columns.Where(d => d.Output == gts); // All target columns from ts to gts
                    if (tCols != null && tCols.Count() > 0)
                    {
                        // TODO: In fact, we need to choose the best column, for example, by comparing their names, usages, ranks and other semantic factors
                        td = tCols.ToList()[0];
                    }

                    if (td == null) // No good target column found (the source column is not covered)
                    {
                        continue;   // TODO: Maybe create a new target column rather than simply ingnoring it
                    }

                    //td.IsIdentity = sd.IsIdentity;

                    newMapping.AddPaths(sd, td, gMapping); // Add a pair of columnss as a match (with expansion using the specified greater mapping)
                }

                newMapping.Similarity = maxCoverage;
                maps.Add(newMapping);
            }

            Mappings.AddRange(maps);
            return(maps);
        }
示例#28
0
        /// <summary>
        /// Generate best mappings from the source set to the target set.
        /// </summary>
        public List <Mapping> MapSet_NEW(DcTable sourceSet, DcTable targetSet)
        {
            // For the first simplest version, we generate only column mappings for relational source tables

            DcSchema       sourceSchema = sourceSet.Schema;
            DcSchema       targetSchema = targetSet.Schema;
            List <Mapping> maps         = new List <Mapping>();

            if (sourceSet.IsPrimitive)
            {
                throw new NotImplementedException();
            }

            // Mapping usage scenarios:
            // - type change (MapCol). Here we have old col with its type and want to change this type.
            //   we list all formally possible new types and for each of them generate a mapping (old type -> new type) by taking into account their usage by the old/new columns
            // - from concrete source set to target schema. new (greater) could be created if they are needed (if all existing are bad)
            //   it is used for importing tables as a whole by finding the best position in the schema
            // - from concrete set to concrete set (new columns could be created if it is allowed and all existing are bad, greater tables could be created if they are allowed and all existing are bad)
            //    here the goal is to find best map to the concrete target but it assumes that all existing target tables could be used as targets for greater tables (types)

            // Building a mapping always means finding a good target type (set as a whole, among all available) for each source type
            // It can be viewed as finding type usage which means a pair <lesser col, type set>.
            // A good type means a set with all its greater columns which means recursion

            // One algorithm is that in order to find a good set mapping we have to find good mappings for its greater tables (recursively)
            // Another algorithm is that build all source and target primitive paths and then evaluate all matches among them. First, we choose only good path matches to decrease the space. Then build one possible mapping and evaluate its quality.

            // One formal and semantic way to think about a mapping is that target col tree has to be fit into the source col tree (or vice versa) by maximizing relevance factor
            // Leaves of the paths must be primitive tables and these tables must be matched
            // The root also must be matched (the question is whether it is a col or set)
            // It is a kind of semantically best tree coverage.
            // Note that intermediate nodes represent trees and hence are also matches that can be evaluated so we get recursion
            // The main question here is how to generate all possible tree coverages (by satisfying some formal conditions like leaf matching)
            // Second question is what are nodes of the tree: cols or tables?
            // What is being matched: nodes (tables or cols), edges, or a pair of <edge, node>
            // How a whole node quality is evaluated as opposed to one edge evaluation?

            // One algorithm to fit a graph into another graph is as follows:
            // Enumerate all possible path matches which satisfy formal constraints:
            // - Leaves are matched (that is, connects starts and ends of the paths - not in the middle)
            // - Next path match must satisfy constraints of all the existing path matches (see CanMatch method)
            //   - If source path has non-null prefix intersection with some existing source path, the target must continue the previous target. Set match inference: these intermediate tables are matched.
            // - Set inference rules:
            //   - common prefix of two source paths in different matches (target paths must also have the same set in the middle)
            //   - (a single variant) The only possible matching set for another set. As a consequence, other tables of this paths might also get the only matching set.
            //   - (no variants) Having no matching set rule. If this set is between an immediately connected tables which have been already matched (no free places, no options).

            // This algorithm could be implemented on paths or on tuples (trees)
            // In the case of trees, we match the tree leaf nodes and then choose other matches and derive set matches (so intermediate set nodes also could be matched).
            // A tree can be then converted to a mapping (path matches) and vice versa
            // The algorithm finds all possible leaf matches
            // For each chosen next free (non-matched) source, it is necessary to choose (best) nest non-matched target leaf (taking into account formal constraints)
            // After choosing next match, derive intermediate set matches. A set match can be represented as a list of formally possible set matches (including empty list, a single set and more)
            // Choosing best options are based on evaluating primitive set (predefined or user-defined) matches. Then we can evaluate complex tables (tree) quality. Here we need to aggregate its column and set matches by taking into account coverage.


            // How it will be used?
            // We will give a relational set as a source (with cols and atts)
            // Some existing non-relational set will be used as a target
            // Normally this call is made before opening an editor for mappings in a dialog box to initialize/recommend mappings
            // The target can be empty or non-empty
            // If it is empty (first call of the dialog for import) then we simply generate identical target columns (copy)
            // If it is non-empty the we do not make any recommendations and can simply edit the existing mapping

            // Columns or attributes?
            // Mappings use column paths by definition
            // One use of mappings is generating a tuple expression which is then used by the interpreter to access functions of the current record
            // 1. For relational set (flat), a record is always a DataRow with attribute names as functions - columns are not used.
            // 2. Relational expanded tables can however generate new attributes which correspond to fk-attribute-paths. Their names are generated and specified in the SQL-query (with joins to attach fk-tables).
            // Second usage of mappings is in editor where the user can choose manually which source attributes have to be chosen for import.
            // Third use is in the mapper for recommendation and schema matching. Here the mapping stores important semantic data.

            // Our first use is to simply store which source attributes have to imported and which target primitive types have to be used.
            // Automatic mapping is not needed here. We list all source attributes - are they columns or attributes? Indeed, we use ColumnPath object. May be use ColumnAtt?
            // If it has to be imported then we add a target column as a match. If not, then either do not add the source or leave the matching target path empty.
            // Parameterize the target path: its greater primtive set, its name.
            // We need an initializer (constructor) for this structure.
            // And we need a dialog to be able to edit this structure, say, by listing all source paths, and for each of them having a checkbox for inclusion as well as name, target primitive set in combo box.

            return(null);
        }
示例#29
0
        public DcTable GetBestSourceSet(DcSchema sourceSchema, DcTable targetSet)
        {
            Mapping bestMapping = GetBestMapping(sourceSchema, targetSet);

            return(bestMapping == null ? null : bestMapping.TargetTab);
        }
示例#30
0
        }                                                                         // Size of the search space. Do not build more potential mappings.

        public DcTable GetBestTargetSet(DcTable sourceSet, DcSchema targetSchema) // Find target in the cache
        {
            Mapping bestMapping = GetBestMapping(sourceSet, targetSchema);

            return(bestMapping == null ? null : bestMapping.TargetTab);
        }