Example #1
0
        /// <summary>
        /// Initializes this instance.
        /// </summary>
        protected override void Initialize()
        {
            var sw = new Stopwatch();

            sw.Start();
            _options.Validate();

            SendMessage("Setting up left and right sides of the join");

            // Abstract:
            //
            // The current built-in join infrastructure is very naive as it uses nested
            // loops. Being a O(m*n) operation, the slowdown becomes readily apparent
            // as the size of the data you're working with increases. As such, the
            // current infrastructure is woefully inadequate for large datasets.
            //
            // How can we fix this problem? We could try to implement various join
            // algorithms for different scenarios, which would be a laborious exercise
            // in itself.
            //
            // Or, we can just delegate this problem to the universal swiss army-knife of
            // databases, SQLite.
            //
            // SQLite has many things going for it, including:
            //  - Support for common types of joins (important!)
            //  - LIGHTING FAST insert performance. The current FdoInputOperation is already optimised for SQLite
            //  - LIGHTING FAST read performance
            //  - Ability to use SQL to modify the database internals, such as creating indexes (FDO provider supports SQL commands)
            //
            // As such, SQLite is the perfect candidate for a temp data store to merge two
            // disparate data sources. The time spent setting up this temp SQLite database (ie. Copying "left" and
            // "right" side data into it) is negligible in the grand scheme of things.
            //
            // Process Overview:
            //
            // 1. Create temp SQLite database
            // 2. Pump left and right sides into this database
            // 3. Create indexes on join columns of both tables (IMPORTANT)
            // 4. Create a view encapsulating our join
            // 5. Copy this view out to our target
            //
            // Additional Notes:
            //
            // We will have to change our supported join types to line up with what SQLite supports, which
            // are:
            //  - INNER JOIN
            //  - LEFT OUTER JOIN
            //
            // SQLite does not support RIGHT OUTER JOINs but these could be emulated by inverting the
            // "left" and "right" tables for the LEFT OUTER JOIN. FULL OUTER JOIN is not supporte by
            // SQLite so this will be removed from our API.
            //
            // Since this SQLite database is temporary, we don't bother with putting
            // the right spatial context in there. Spatial contexts do not (should not) affect
            // the underlying coordinates of any geometries moving to and from the data store.
            //
            // SQLite views by default are represented as non-Feature classes. Geometry properties
            // default to BLOB data types. To "fix" this we need to add a new entry to the geometry_columns
            // metadata table. This may produce an incorrect feature class (ie. Has 1-n geometry properties
            // but no designated one), this is okay as we only care that the properties are there and the
            // temp-target property mappings check out.
            //
            // Although the implementation will change, the requirements remain the same, which are:
            //
            // 1. The target class must not already exist (as it will be created)
            // 2. If no designated geometry is specified, then the class definition will be FdoClass and not FdoFeatureClass

            ClassDefinition leftCls   = null;
            ClassDefinition rightCls  = null;
            ClassDefinition mergedCls = null;

            using (var leftSvc = _options.Left.Connection.CreateFeatureService())
                using (var rightSvc = _options.Right.Connection.CreateFeatureService())
                {
                    leftCls  = leftSvc.GetClassByName(_options.Left.SchemaName, _options.Left.ClassName);
                    rightCls = rightSvc.GetClassByName(_options.Right.SchemaName, _options.Right.ClassName);

                    if (leftCls == null)
                    {
                        throw new FdoETLException("Left class not found " + _options.Left.SchemaName + ":" + _options.Left.ClassName);
                    }

                    if (rightCls == null)
                    {
                        throw new FdoETLException("Right class not found " + _options.Right.SchemaName + ":" + _options.Right.ClassName);
                    }

                    var leftJoinProps  = new List <string>(_options.JoinPairs.AllKeys);
                    var rightJoinProps = new List <string>();
                    foreach (var p in leftJoinProps)
                    {
                        rightJoinProps.Add(_options.JoinPairs[p]);
                    }

                    var leftGeom  = (!string.IsNullOrEmpty(_options.GeometryProperty) && _options.Side == JoinSide.Left) ? _options.GeometryProperty : null;
                    var rightGeom = (!string.IsNullOrEmpty(_options.GeometryProperty) && _options.Side == JoinSide.Right) ? _options.GeometryProperty : null;

                    PrepareClass(leftCls, _options.LeftProperties, leftJoinProps, _options.LeftPrefix, leftGeom);
                    PrepareClass(rightCls, _options.RightProperties, rightJoinProps, _options.RightPrefix, rightGeom);

                    mergedCls = CreateMergedClass(leftCls, rightCls);
                }

            var dprops = new NameValueCollection();

            dprops["File"] = Path.GetTempFileName();

            var tempSchema = new FeatureSchema("Default", "");
            var leftCopy   = FdoSchemaUtil.CloneClass(leftCls);
            var rightCopy  = FdoSchemaUtil.CloneClass(rightCls);

            string leftClassName  = "LEFT_SIDE";
            string rightClassName = "RIGHT_SIDE";

            leftCopy.Name  = leftClassName;
            rightCopy.Name = rightClassName;

            tempSchema.Classes.Add(leftCopy);
            tempSchema.Classes.Add(rightCopy);

            //Create SQLite database
            Register(new FdoCreateDataStoreOperation("OSGeo.SQLite", dprops, null));

            //Apply temp schema
            var tempConn = new FdoConnection("OSGeo.SQLite", "File=" + dprops["File"]);

            Register(new FdoApplySchemaOperation(tempConn, tempSchema));

#if DEBUG
            Register(new FdoSingleActionOperation(() => { SendMessage("Temp db created in: " + dprops["File"]); }));
#endif

            //Prep property mappings for bulk copy
            var leftMaps  = new NameValueCollection();
            var rightMaps = new NameValueCollection();

            var leftQuery  = new FeatureQueryOptions(leftCls.Name);
            var rightQuery = new FeatureQueryOptions(rightCls.Name);

            foreach (var leftp in _options.LeftProperties)
            {
                if (string.IsNullOrEmpty(_options.LeftPrefix))
                {
                    leftMaps.Add(leftp, leftp);
                }
                else
                {
                    leftMaps.Add(leftp, _options.LeftPrefix + leftp);
                }
                leftQuery.AddFeatureProperty(leftp);
            }
            foreach (var rightp in _options.RightProperties)
            {
                if (string.IsNullOrEmpty(_options.RightPrefix))
                {
                    rightMaps.Add(rightp, rightp);
                }
                else
                {
                    rightMaps.Add(rightp, _options.RightPrefix + rightp);
                }
                rightQuery.AddFeatureProperty(rightp);
            }

            if (!string.IsNullOrEmpty(_options.LeftFilter))
            {
                leftQuery.Filter = _options.LeftFilter;
            }

            if (!string.IsNullOrEmpty(_options.RightFilter))
            {
                rightQuery.Filter = _options.RightFilter;
            }

            //don't forget join keys
            foreach (string l in _options.JoinPairs.Keys)
            {
                string r = _options.JoinPairs[l];

                if (!_options.LeftProperties.Contains(l))
                {
                    leftQuery.AddFeatureProperty(l);

                    if (string.IsNullOrEmpty(_options.LeftPrefix))
                    {
                        leftMaps.Add(l, l);
                    }
                    else
                    {
                        leftMaps.Add(l, _options.LeftPrefix + l);
                    }
                }

                if (!_options.RightProperties.Contains(r))
                {
                    rightQuery.AddFeatureProperty(r);

                    if (string.IsNullOrEmpty(_options.RightPrefix))
                    {
                        rightMaps.Add(r, r);
                    }
                    else
                    {
                        rightMaps.Add(r, _options.RightPrefix + r);
                    }
                }
            }

            //don't forget geometry!
            if (!string.IsNullOrEmpty(_options.GeometryProperty))
            {
                if (_options.Side == JoinSide.Left)
                {
                    if (!leftQuery.PropertyList.Contains(_options.GeometryProperty))
                    {
                        leftQuery.AddFeatureProperty(_options.GeometryProperty);

                        if (string.IsNullOrEmpty(_options.LeftPrefix))
                        {
                            leftMaps.Add(_options.GeometryProperty, _options.GeometryProperty);
                        }
                        else
                        {
                            leftMaps.Add(_options.GeometryProperty, _options.LeftPrefix + _options.GeometryProperty);
                        }
                    }
                }
                else
                {
                    if (!rightQuery.PropertyList.Contains(_options.GeometryProperty))
                    {
                        rightQuery.AddFeatureProperty(_options.GeometryProperty);

                        if (string.IsNullOrEmpty(_options.RightPrefix))
                        {
                            rightMaps.Add(_options.GeometryProperty, _options.GeometryProperty);
                        }
                        else
                        {
                            rightMaps.Add(_options.GeometryProperty, _options.RightPrefix + _options.GeometryProperty);
                        }
                    }
                }
            }

            var copyLeftErrors   = new List <Exception>();
            var copyRightErrors  = new List <Exception>();
            var copyTargetErrors = new List <Exception>();

            //Copy left source
            ParameterlessAction copyLeft = () =>
            {
                SendMessage("Copying left source with filter: " + _options.LeftFilter);
                var copy = ExpressUtility.CreateBulkCopy(
                    _options.Left.Connection,
                    tempConn,
                    _options.Left.SchemaName,
                    leftQuery,
                    tempSchema.Name,    //temp sqlite schema name
                    leftClassName,      //sqlite "left" class name
                    leftMaps);

                copy.ProcessMessage += delegate(object sender, MessageEventArgs e)
                {
                    SendMessage(e.Message);
                };
                copy.Execute();
                copyLeftErrors.AddRange(copy.GetAllErrors());
            };
            Register(new FdoSingleActionOperation(copyLeft));

            //Register(new FdoInputOperation(_options.Left.Connection, leftQuery));
            //Register(new FdoOutputOperation(tempConn, leftClassName, leftMaps));

            //Copy right source
            ParameterlessAction copyRight = () =>
            {
                SendMessage("Copying right source with filter: " + _options.RightFilter);
                var copy = ExpressUtility.CreateBulkCopy(
                    _options.Right.Connection,
                    tempConn,
                    _options.Right.SchemaName,
                    rightQuery,
                    tempSchema.Name,    //temp sqlite schema name
                    rightClassName,     //sqlite "right" class name
                    rightMaps);

                copy.ProcessMessage += delegate(object sender, MessageEventArgs e)
                {
                    SendMessage(e.Message);
                };
                copy.Execute();
                copyRightErrors.AddRange(copy.GetAllErrors());
            };
            Register(new FdoSingleActionOperation(copyRight));

            //Register(new FdoInputOperation(_options.Right.Connection, rightQuery));
            //Register(new FdoOutputOperation(tempConn, rightClassName, rightMaps));

            string srcClass = "VIEW_INPUT";

            //Create indexes on left and right sides to optimize read performance
            ParameterlessAction indexLeft = () =>
            {
                using (var svc = tempConn.CreateFeatureService())
                {
                    SendMessage("Creating left side index in temp db");
                    string sql    = "CREATE INDEX IDX_LEFT_ID ON " + leftClassName + "(";
                    var    tokens = new List <string>();
                    foreach (string p in _options.JoinPairs.Keys)
                    {
                        if (!string.IsNullOrEmpty(_options.LeftPrefix))
                        {
                            tokens.Add(_options.LeftPrefix + p);
                        }
                        else
                        {
                            tokens.Add(p);
                        }
                    }
                    sql = sql + string.Join(", ", tokens.ToArray()) + ")";
                    SendMessage("Executing SQL: " + sql);
                    svc.ExecuteSQLNonQuery(sql);
                }
            };
            ParameterlessAction indexRight = () =>
            {
                using (var svc = tempConn.CreateFeatureService())
                {
                    SendMessage("Creating right side index in temp db");
                    string sql    = "CREATE INDEX IDX_RIGHT_ID ON " + rightClassName + "(";
                    var    tokens = new List <string>();
                    foreach (string p in _options.JoinPairs.Keys)
                    {
                        string prop = _options.JoinPairs[p];
                        if (!string.IsNullOrEmpty(_options.RightPrefix))
                        {
                            tokens.Add(_options.RightPrefix + prop);
                        }
                        else
                        {
                            tokens.Add(prop);
                        }
                    }
                    sql = sql + string.Join(", ", tokens.ToArray()) + ")";
                    SendMessage("Executing SQL: " + sql);
                    svc.ExecuteSQLNonQuery(sql);
                }
            };
            Register(new FdoSingleActionOperation(indexLeft));
            Register(new FdoSingleActionOperation(indexRight));

            //Create view
            ParameterlessAction createView = () =>
            {
                using (var svc = tempConn.CreateFeatureService())
                {
                    SendMessage("Creating view in temp db");
                    StringBuilder sql = new StringBuilder("CREATE VIEW ");
                    sql.Append(srcClass + " AS SELECT ");
                    foreach (var p in _options.LeftProperties)
                    {
                        if (!string.IsNullOrEmpty(_options.LeftPrefix))
                        {
                            sql.Append("l." + _options.LeftPrefix + p + ", ");
                        }
                        else
                        {
                            sql.Append("l." + p + ", ");
                        }
                    }

                    if (!string.IsNullOrEmpty(_options.GeometryProperty))
                    {
                        if (_options.Side == JoinSide.Left)
                        {
                            if (!_options.LeftProperties.Contains(_options.GeometryProperty))
                            {
                                if (!string.IsNullOrEmpty(_options.LeftPrefix))
                                {
                                    sql.Append("l." + _options.LeftPrefix + _options.GeometryProperty + ", ");
                                }
                                else
                                {
                                    sql.Append("l." + _options.GeometryProperty + ", ");
                                }
                            }
                        }
                        else
                        {
                            if (!_options.RightProperties.Contains(_options.GeometryProperty))
                            {
                                if (!string.IsNullOrEmpty(_options.RightPrefix))
                                {
                                    sql.Append("r." + _options.RightPrefix + _options.GeometryProperty + ", ");
                                }
                                else
                                {
                                    sql.Append("r." + _options.GeometryProperty + ", ");
                                }
                            }
                        }
                    }

                    int rc = _options.RightProperties.Count;
                    int i  = 0;
                    foreach (var p in _options.RightProperties)
                    {
                        string pn = p;
                        if (!string.IsNullOrEmpty(_options.RightPrefix))
                        {
                            pn = _options.RightPrefix + pn;
                        }

                        if (i == rc - 1)
                        {
                            sql.Append("r." + pn + " FROM ");
                        }
                        else
                        {
                            sql.Append("r." + pn + ", ");
                        }
                        i++;
                    }
                    sql.Append(leftClassName + " l ");

                    switch (_options.JoinType)
                    {
                    case FdoJoinType.Inner:
                        sql.Append("INNER JOIN " + rightClassName + " r ON ");
                        break;

                    case FdoJoinType.Left:
                        sql.Append("LEFT OUTER JOIN " + rightClassName + " r ON ");
                        break;

                    default:
                        throw new FdoETLException("Unsupported join type: " + _options.JoinType);
                    }

                    rc = _options.JoinPairs.Count;
                    i  = 0;
                    foreach (string l in _options.JoinPairs.Keys)
                    {
                        string r = _options.JoinPairs[l];

                        string left  = l;
                        string right = r;

                        if (!string.IsNullOrEmpty(_options.LeftPrefix))
                        {
                            left = _options.LeftPrefix + left;
                        }

                        if (!string.IsNullOrEmpty(_options.RightPrefix))
                        {
                            right = _options.RightPrefix + right;
                        }

                        if (i == rc - 1)
                        {
                            sql.Append("l." + left + " = r." + right);
                        }
                        else
                        {
                            sql.Append("l." + left + " = r." + right + " AND ");
                        }

                        i++;
                    }
                    SendMessage("Executing SQL: " + sql.ToString());
                    svc.ExecuteSQLNonQuery(sql.ToString());
                }
            };
            Register(new FdoSingleActionOperation(createView));

            //Hack FDO metadata to make this a feature class
            if (!string.IsNullOrEmpty(_options.GeometryProperty))
            {
                ParameterlessAction reg = () =>
                {
                    using (var svc = tempConn.CreateFeatureService())
                    {
                        SendMessage("Exposing view as a FDO feature class");
                        string sql = "INSERT INTO geometry_columns(f_table_name, f_geometry_column, geometry_type, geometry_dettype, coord_dimension, srid, geometry_format) VALUES('" + srcClass + "','" + _options.GeometryProperty + "',15,7743,0,0,'FGF')";
                        SendMessage("Executing SQL: " + sql.ToString());
                        svc.ExecuteSQLNonQuery(sql);
                    }
                };
                Register(new FdoSingleActionOperation(reg));
            }

            //Copy view to target
            ParameterlessAction applyTarget = () =>
            {
                using (var svc = _options.Target.Connection.CreateFeatureService())
                {
                    SendMessage("Fetching target schema");
                    var schema = svc.GetSchemaByName(_options.Target.SchemaName);

                    IncompatibleClass cls;
                    if (!svc.CanApplyClass(mergedCls, out cls))
                    {
                        SendMessage("Fixing incompatibilities in merged class");
                        mergedCls = svc.AlterClassDefinition(mergedCls, cls);
                    }

                    SendMessage("Adding merged class to target schema");
                    schema.Classes.Add(mergedCls);
                    SendMessage("Applying modified target schema");
                    svc.ApplySchema(schema);
                }
            };
            Register(new FdoSingleActionOperation(applyTarget));

            var tempQuery     = new FeatureQueryOptions("VIEW_INPUT");
            var targetMapping = new NameValueCollection();
            foreach (PropertyDefinition p in mergedCls.Properties)
            {
                tempQuery.AddFeatureProperty(p.Name);
                //Target class is a replica of the temp one, so all properties
                //have the same name in both source and target
                targetMapping[p.Name] = p.Name;
            }

            ParameterlessAction copyToTarget = () =>
            {
                var copy = ExpressUtility.CreateBulkCopy(
                    tempConn,
                    _options.Target.Connection,
                    tempSchema.Name,
                    tempQuery,
                    _options.Target.SchemaName,
                    _options.Target.ClassName,
                    targetMapping);

                copy.ProcessMessage += delegate(object sender, MessageEventArgs e)
                {
                    SendMessage(e.Message);
                };
                copy.Execute();
                copyTargetErrors.AddRange(copy.GetAllErrors());
                sw.Stop();
            };
            Register(new FdoSingleActionOperation(copyToTarget));

            //Log all errors
            ParameterlessAction logErrors = () =>
            {
                SendMessage(copyLeftErrors.Count + " errors encountered copying left source to temp db");
                _allErrors.AddRange(copyLeftErrors);
                SendMessage(copyRightErrors.Count + " errors encountered copying right source to temp db");
                _allErrors.AddRange(copyRightErrors);
                SendMessage(copyTargetErrors.Count + " errors encountered copying merged source to target");
                _allErrors.AddRange(copyTargetErrors);
                SendMessage("Join Operation completed in " + sw.Elapsed.ToString());
            };
            Register(new FdoSingleActionOperation(logErrors));
        }
 public FdoSingleActionOperation(ParameterlessAction action)
 {
     _action = action;
 }
Example #3
0
 public FdoSingleActionOperation(ParameterlessAction action)
 {
     _action = action;
 }