Esempio n. 1
0
        public void AddPaths(DcColumn sd, DcColumn td, Mapping gMapping) // Add this pair by expanding it using the mapping
        {
            Debug.Assert(sd != null && sd.Input == SourceTab, "Wrong use: source path must start from the source table.");
            Debug.Assert(td != null && td.Input == TargetTab, "Wrong use: target path must start from the target table.");

            Debug.Assert(sd != null && sd.Output == gMapping.SourceTab, "Wrong use: source path must end where the mapping starts.");
            Debug.Assert(td != null && td.Output == gMapping.TargetTab, "Wrong use: target path must end where the mapping starts.");

            if (gMapping.Matches.Count == 0)      // If there are no continuations then add only the starting segments (for example, for mappings between primitive tables)
            {
                ColumnPath sp = new ColumnPath(); // A path consists of one segment
                sp.InsertLast(sd);

                ColumnPath tp = new ColumnPath(); // A path consists of one segment
                tp.InsertLast(td);

                PathMatch match = new PathMatch(sp, tp);
                Matches.Add(match);
            }

            foreach (PathMatch gMatch in gMapping.Matches)
            {
                ColumnPath sp = new ColumnPath(); // Create source path by concatenating one segment and continuation path from the mapping
                sp.InsertLast(sd);
                sp.InsertLast(gMatch.SourcePath);

                ColumnPath tp = new ColumnPath(); // Create target path by concatenating one segment and continuation path from the mapping
                tp.InsertLast(td);
                tp.InsertLast(gMatch.TargetPath);

                PathMatch match = new PathMatch(sp, tp);
                Matches.Add(match);
            }
        }
Esempio n. 2
0
        public ColumnTree FindPath(ColumnPath path) // Find a node corresponding to the path.
        {
            Debug.Assert(path != null && path.Input == Set, "Wrong use: path must start from the node it is added to.");

            if (path.Segments == null || path.Segments.Count == 0)
            {
                return(null);
            }

            DcColumn   seg;
            ColumnTree node = this;

            for (int i = 0; i < path.Segments.Count; i++) // We try to find segments sequentially
            {
                seg = path.Segments[i];
                ColumnTree child = node.GetChild(seg); // Find a child corresponding to this segment

                if (child == null)                     // Add a new child corresponding to this segment
                {
                    return(null);
                }

                node = child;
            }

            return(node);
        }
Esempio n. 3
0
        public ColumnTree AddPath(ColumnPath path) // Find or create nodes corresponding to the path.
        {
            Debug.Assert(path != null && path.Input == Set, "Wrong use: path must start from the node it is added to.");

            if (path.Segments == null || path.Segments.Count == 0)
            {
                return(null);
            }

            DcColumn   seg;
            ColumnTree node = this;

            for (int i = 0; i < path.Segments.Count; i++) // We add all segments sequentially
            {
                seg = path.Segments[i];
                ColumnTree child = node.GetChild(seg); // Find a child corresponding to this segment

                if (child == null)                     // Add a new child corresponding to this segment
                {
                    child        = (ColumnTree)Activator.CreateInstance(node.GetType());
                    child.Column = seg;
                    node.AddChild(child);
                }

                node = child;
            }

            return(node);
        }
Esempio n. 4
0
 public bool IsSourcePathValid(ColumnPath path)
 {
     if (path.Input != SourceTab && !SourceTab.IsInput(path.Input))
     {
         return(false);
     }
     return(true);
 }
Esempio n. 5
0
 public bool IsTargetPathValid(ColumnPath path)
 {
     if (path.Input != TargetTab && !TargetTab.IsInput(path.Input))
     {
         return(false);
     }
     return(true);
 }
Esempio n. 6
0
        public void InsertFirst(ColumnPath path) // Insert new segments from the specified path at the beginning of the path
        {
            Debug.Assert(Size == 0 || path.Output == Input, "A path must continue the first segment inserted in the beginning.");

            Segments.InsertRange(0, path.Segments);
            Input = path.Input;
            if (Output == null)
            {
                Output = path.Output;
            }
        }
Esempio n. 7
0
 public PathMatch GetMatchForTarget(ColumnPath path) // Find a match with this path
 {
     foreach (PathMatch m in Matches)
     {
         if (m.MatchesTarget(path))
         {
             return(m);
         }
     }
     return(null);
 }
Esempio n. 8
0
 public void RemoveFirst(ColumnPath sourcePath, ColumnPath targetPath)
 {
     if (sourcePath != null)
     {
         _sourceTab = sourcePath.Output;
         Matches.ForEach(m => m.SourcePath.RemoveFirst(sourcePath));
     }
     if (targetPath != null)
     {
         _targetTab = targetPath.Output;
         Matches.ForEach(m => m.TargetPath.RemoveFirst(targetPath));
     }
 }
Esempio n. 9
0
        public void RemoveMatch(ColumnPath sourcePath, ColumnPath targetPath) // Remove the specified and all more specific matches (continuations)
        {
            Debug.Assert(sourcePath.Input == SourceTab, "Wrong use: source path must start from the source table.");
            Debug.Assert(targetPath.Input == TargetTab, "Wrong use: target path must start from the target table.");

            List <PathMatch> toRemove = new List <PathMatch>();

            foreach (PathMatch m in Matches)
            {
                // If existing match is the same or more specific than the specified match to be removed
                if (m.MatchesSource(sourcePath) && m.MatchesTarget(targetPath))
                {
                    toRemove.Add(m);
                }
            }

            toRemove.ForEach(m => Matches.Remove(m));
        }
Esempio n. 10
0
        public ColumnPath SubPath(int index, int count = 0) // Return a new path consisting of the specified segments
        {
            ColumnPath ret = new ColumnPath();

            if (count == 0)
            {
                count = Segments.Count - index;
            }

            for (int i = 0; i < count; i++)
            {
                ret.Segments.Add(Segments[index + i]);
            }

            ret.Output = ret.Segments[0].Input;
            ret.Input  = ret.Segments[ret.Segments.Count - 1].Output;

            return(ret);
        }
Esempio n. 11
0
        public void InsertLast(ColumnPath path) // Append all segments of the specified path to the end of this path
        {
            Debug.Assert(Size == 0 || path.Input == Output, "A an appended path must continue this path.");

            if (path == null || path.Size == 0)
            {
                return;
            }

            for (int i = 0; i < path.Segments.Count; i++)
            {
                Segments.Add(path.Segments[i]);
            }

            Output = path.Output;
            if (Input == null)
            {
                Input = path.Input;
            }
        }
Esempio n. 12
0
        public void RemoveFirst(ColumnPath path) // Remove first segments
        {
            if (Segments.Count < path.Segments.Count)
            {
                return;                                       // Nothing to remove
            }
            if (!this.StartsWith(path))
            {
                return;
            }

            Segments.RemoveRange(0, path.Segments.Count);

            if (Segments.Count > 0)
            {
                Input = Segments[0].Input;
            }
            else
            {
                Input = Output;
            }
        }
Esempio n. 13
0
        /// <summary>
        /// Find best path starting from the target set and corresponding to the source path.
        /// </summary>
        public ColumnPath MapCol(ColumnPath sourcePath, DcTable targetSet)
        {
            List <ColumnPath> targetPaths = (new PathEnumerator(targetSet, ColumnType.IDENTITY_ENTITY)).ToList();

            if (targetPaths.Count == 0)
            {
                return(null);
            }

            ColumnPath bestTargetPath = null;
            double     bestSimilarity = Double.MinValue;

            foreach (ColumnPath targetPath in targetPaths)
            {
                double similarity = StringSimilarity.ComputePathSimilarity(sourcePath, targetPath);
                if (similarity > bestSimilarity)
                {
                    bestSimilarity = similarity;
                    bestTargetPath = targetPath;
                }
            }

            return(bestTargetPath);
        }
Esempio n. 14
0
        /// <summary>
        /// Build mappings from the source set to the target set. The tables are greater tables of the specified columns.
        /// The mapping should take into account (semantically) that these tables are used from these columns.
        /// </summary>
        public List <Mapping> MapCol(ColumnPath sourcePath, ColumnPath targetPath)
        {
            // We analyze all continuations of the specified prefix paths
            List <ColumnPath> sourcePaths = (new PathEnumerator(sourcePath.Output, ColumnType.IDENTITY_ENTITY)).ToList();

            sourcePaths.ForEach(p => p.InsertFirst(sourcePath));
            if (sourcePaths.Count == 0)
            {
                sourcePaths.Add(sourcePath);
            }

            List <ColumnPath> targetPaths = (new PathEnumerator(targetPath.Output, ColumnType.IDENTITY_ENTITY)).ToList();

            targetPaths.ForEach(p => p.InsertFirst(targetPath));
            if (targetPaths.Count == 0)
            {
                targetPaths.Add(targetPath);
            }

            List <Mapping> mappings = new List <Mapping>();

            int colCount = sourcePaths.Count();

            var matches = new List <Tuple <ColumnPath, List <ColumnPath> > >(); // List of: <srcPath, targetPaths>

            int[] lengths = new int[colCount];                                  // Each column has some length (some valid target paths)
            for (int i = 0; i < colCount; i++)
            {
                ColumnPath        sp  = sourcePaths[i];
                List <ColumnPath> tps = new List <ColumnPath>();

                // Sort target paths according to their similarity
                tps.AddRange(targetPaths);
                tps = tps.OrderByDescending(p => StringSimilarity.ComputePathSimilarity(sp, p)).ToList();
                if (tps.Count > MaxPossibleTargetPaths) // Leave only top n target paths with the best similarity
                {
                    tps.RemoveRange(MaxPossibleTargetPaths, tps.Count - MaxPossibleTargetPaths);
                }

                // TODO: Cut the tail with similarity less than MinPathSimilarity

                matches.Add(Tuple.Create(sp, tps));
                lengths[i] = tps.Count;
            }

            int[] offsets = new int[colCount]; // Here we store the current state of choices for each columns (target path number)
            for (int i = 0; i < colCount; i++)
            {
                offsets[i] = -1;
            }

            int top = -1; // The current level/top where we change the offset. Depth of recursion.

            do
            {
                ++top;
            } while (top < colCount && lengths[top] == 0);

            int mappingsBuilt = 0; // The number of all hypothesis (mappings) built and checked

            Func <int, Mapping> BuildSetMapping = delegate(int sourcePathCount)
            {
                bool    withPrefix = true;
                Mapping mapping;
                if (withPrefix)
                {
                    mapping = new Mapping(sourcePath.Input, targetPath.Input);
                }
                else
                {
                    mapping = new Mapping(sourcePath.Output, targetPath.Output);
                }

                for (int i = 0; i < sourcePathCount; i++)
                {
                    if (offsets[i] < 0 || offsets[i] >= lengths[i])
                    {
                        continue;
                    }

                    ColumnPath sp = matches[i].Item1;
                    if (!withPrefix)
                    {
                        sp.RemoveFirst();
                    }
                    ColumnPath tp = matches[i].Item2[offsets[i]];
                    if (!withPrefix)
                    {
                        tp.RemoveFirst();
                    }

                    mapping.AddMatch(new PathMatch(sp, tp));
                }

                return(mapping);
            };

            while (top >= 0)
            {
                if (top == colCount) // Element is ready. Process new element.
                {
                    if (++mappingsBuilt > MaxMappingsToBuild)
                    {
                        break;
                    }

                    // Check coverage. However many source paths have been assigned a non-null target path
                    double coverage = 0;
                    for (int i = 0; i < top; i++)
                    {
                        if (offsets[i] >= 0 && offsets[i] < lengths[i])
                        {
                            coverage += 1;
                        }
                    }

                    coverage /= colCount;

                    if (coverage >= MinSourcePathsMatched)
                    {
                        // Evaluate the whole mapping (aggregated quality with coverage and other parameters)
                        Mapping currentMapping = BuildSetMapping(top);

                        currentMapping.ComputeSimilarity();
                        currentMapping.Similarity *= coverage;
                        if (currentMapping.Similarity >= MinSetMappingQuality)
                        {
                            mappings.Add(currentMapping);
                        }
                    }

                    top--;
                    while (top >= 0 && (offsets[top] >= lengths[top] || lengths[top] == 0)) // Go up by skipping finished and empty columns
                    {
                        offsets[top--] = -1;
                    }
                }
                else // Find the next valid offset
                {
                    Mapping currentMapping = BuildSetMapping(top);

                    for (offsets[top]++; offsets[top] < lengths[top]; offsets[top]++)
                    {
                        ColumnPath sp = matches[top].Item1;
                        ColumnPath tp = matches[top].Item2[offsets[top]]; // New target path

                        bool canUse = true;

                        // Check if it has not been already used as a target for previous paths
                        for (int i = 0; i < top; i++)
                        {
                            if (offsets[i] < 0 || offsets[i] >= lengths[i])
                            {
                                continue;
                            }
                            ColumnPath usedtp = matches[i].Item2[offsets[i]]; // Used target path (by i-th source path)
                            if (usedtp == tp)
                            {
                                canUse = false; break;
                            }
                        }
                        if (!canUse)
                        {
                            continue;
                        }

                        canUse = currentMapping.Compatible(new PathMatch(sp, tp));
                        if (!canUse)
                        {
                            continue;
                        }

                        break; // Found
                    }

                    // Offset chosen. Go foreward by skipping empty columns.
                    top++;
                    while (top < colCount && (offsets[top] >= lengths[top] || lengths[top] == 0)) // Go up (foreward) by skipping finished and empty columns
                    {
                        top++;
                    }
                }
            }

            mappings = mappings.OrderByDescending(m => m.Similarity).ToList();

            // Remove prefixes
            foreach (Mapping m in mappings)
            {
                m.RemoveFirst(sourcePath, targetPath);
            }

            Mappings.AddRange(mappings);
            return(mappings);
        }
Esempio n. 15
0
 public bool SamePath(ColumnPath path) // Equals (the same segments)
 {
     return(SamePath(path.Segments));
 }
Esempio n. 16
0
 public PathMatch(PathMatch m)
 {
     SourcePath = new ColumnPath(m.SourcePath);
     TargetPath = new ColumnPath(m.TargetPath);
     Similarity = m.Similarity;
 }
Esempio n. 17
0
 public PathMatch(ColumnPath sourcePath, ColumnPath targetPath, double similarity)
 {
     SourcePath = sourcePath;
     TargetPath = targetPath;
     Similarity = similarity;
 }
Esempio n. 18
0
 public PathMatch(ColumnPath sourcePath, ColumnPath targetPath)
     : this(sourcePath, targetPath, 1.0)
 {
 }
Esempio n. 19
0
 public bool MatchesTarget(ColumnPath path) // This is more specific (longer) than argument
 {
     return(TargetPath.StartsWith(path));
 }
Esempio n. 20
0
 public void InsertAt(ColumnPath path) // Insert a new segment at the specified position
 {
     throw new NotImplementedException();
 }
Esempio n. 21
0
 public bool StartsWith(ColumnPath path)
 {
     return(StartsWith(path.Segments));
 }
Esempio n. 22
0
 public ColumnPath(ColumnPath path)
     : base(path)
 {
     Segments = new List <DcColumn>();
     Segments.AddRange(path.Segments);
 }
Esempio n. 23
0
 public void RemoveLast(ColumnPath path) // Remove last segments (suffix)
 {
     throw new NotImplementedException();
 }
Esempio n. 24
0
        /// <summary>
        /// Create and initialize a new mapping which produces a flat target set with all primitive columns for copying primitive data from the source set.
        /// Only identity (PK) source columns are expanded recursively.
        /// For relational source, this means that all primitive columns of the source table will be mapped with their relational names, no FK-referenced tables will be joined and no artifical column names will be used.
        /// If it is necessary to expand entity columns (non-PK columns of joined tables) then a different implementation is needed (which will require joins, artifical column/path names etc.)
        /// </summary>
        public Mapping CreatePrimitive(DcTable sourceSet, DcTable targetSet, DcSchema targetSchema)
        {
            Debug.Assert(!sourceSet.IsPrimitive && !targetSet.IsPrimitive, "Wrong use: copy mapping can be created for only non-primitive tables.");
            Debug.Assert(targetSchema != null || targetSet.Schema != null, "Wrong use: target schema must be specified.");

            Mapping map = new Mapping(sourceSet, targetSet);

            DcSchema sourceSchema = map.SourceTab.Schema;

            if (targetSchema == null)
            {
                targetSchema = targetSet.Schema;
            }

            ColumnPath sp;
            ColumnPath tp;

            DcColumn td;

            PathMatch match;

            if (sourceSchema is SchemaOledb)
            {
                TableRel set = (TableRel)map.SourceTab;
                foreach (ColumnAtt att in set.GreaterPaths)
                {
                    sp = new ColumnAtt(att);

                    // Recommend matching target type (mapping primitive types)
                    this.MapPrimitiveSet(att.Output, targetSchema);
                    DcTable targetType = this.GetBestTargetSet(att.Output, targetSchema);

                    td      = new Schema.Column(att.RelationalColumnName, map.TargetTab, targetType, att.IsKey, false);
                    tp      = new ColumnPath(td);
                    tp.Name = sp.Name;

                    match = new PathMatch(sp, tp, 1.0);

                    map.Matches.Add(match);
                }
            }
            else if (sourceSchema is SchemaCsv)
            {
                DcTable set = (DcTable)map.SourceTab;
                foreach (DcColumn sd in set.Columns)
                {
                    if (sd.IsSuper)
                    {
                        continue;
                    }

                    // Recommend matching target type (mapping primitive types)
                    //this.MapPrimitiveSet(sd, targetSchema);
                    //ComTable targetType = this.GetBestTargetSet(sd.Output, targetSchema);

                    //
                    // Analyze sample values of sd and choose the most specific target type
                    //
                    List <string> values = ((ColumnCsv)sd).SampleValues;

                    string targetTypeName;
                    if (Com.Schema.Utils.isInt32(values.ToArray()))
                    {
                        targetTypeName = "Integer";
                    }
                    else if (Com.Schema.Utils.isDouble(values.ToArray()))
                    {
                        targetTypeName = "Double";
                    }
                    else
                    {
                        targetTypeName = "String";
                    }

                    DcTable targetType = targetSchema.GetPrimitiveType(targetTypeName);

                    td = targetSchema.Space.CreateColumn(sd.Name, map.TargetTab, targetType, sd.IsKey);

                    sp = new ColumnPath(sd);
                    tp = new ColumnPath(td);

                    match = new PathMatch(sp, tp, 1.0);

                    map.Matches.Add(match);
                }
            }

            return(map);
        }
Esempio n. 25
0
 public int IndexOf(ColumnPath path) // Return index of the beginning of the specified path in this path
 {
     throw new NotImplementedException();
 }