/// <summary> /// Initializes this instance. /// </summary> protected override void Initialize() { var sw = new Stopwatch(); sw.Start(); _options.Validate(); SendMessage("Setting up left and right sides of the join"); // Abstract: // // The current built-in join infrastructure is very naive as it uses nested // loops. Being a O(m*n) operation, the slowdown becomes readily apparent // as the size of the data you're working with increases. As such, the // current infrastructure is woefully inadequate for large datasets. // // How can we fix this problem? We could try to implement various join // algorithms for different scenarios, which would be a laborious exercise // in itself. // // Or, we can just delegate this problem to the universal swiss army-knife of // databases, SQLite. // // SQLite has many things going for it, including: // - Support for common types of joins (important!) // - LIGHTING FAST insert performance. The current FdoInputOperation is already optimised for SQLite // - LIGHTING FAST read performance // - Ability to use SQL to modify the database internals, such as creating indexes (FDO provider supports SQL commands) // // As such, SQLite is the perfect candidate for a temp data store to merge two // disparate data sources. The time spent setting up this temp SQLite database (ie. Copying "left" and // "right" side data into it) is negligible in the grand scheme of things. // // Process Overview: // // 1. Create temp SQLite database // 2. Pump left and right sides into this database // 3. Create indexes on join columns of both tables (IMPORTANT) // 4. Create a view encapsulating our join // 5. Copy this view out to our target // // Additional Notes: // // We will have to change our supported join types to line up with what SQLite supports, which // are: // - INNER JOIN // - LEFT OUTER JOIN // // SQLite does not support RIGHT OUTER JOINs but these could be emulated by inverting the // "left" and "right" tables for the LEFT OUTER JOIN. FULL OUTER JOIN is not supporte by // SQLite so this will be removed from our API. // // Since this SQLite database is temporary, we don't bother with putting // the right spatial context in there. Spatial contexts do not (should not) affect // the underlying coordinates of any geometries moving to and from the data store. // // SQLite views by default are represented as non-Feature classes. Geometry properties // default to BLOB data types. To "fix" this we need to add a new entry to the geometry_columns // metadata table. This may produce an incorrect feature class (ie. Has 1-n geometry properties // but no designated one), this is okay as we only care that the properties are there and the // temp-target property mappings check out. // // Although the implementation will change, the requirements remain the same, which are: // // 1. The target class must not already exist (as it will be created) // 2. If no designated geometry is specified, then the class definition will be FdoClass and not FdoFeatureClass ClassDefinition leftCls = null; ClassDefinition rightCls = null; ClassDefinition mergedCls = null; using (var leftSvc = _options.Left.Connection.CreateFeatureService()) using (var rightSvc = _options.Right.Connection.CreateFeatureService()) { leftCls = leftSvc.GetClassByName(_options.Left.SchemaName, _options.Left.ClassName); rightCls = rightSvc.GetClassByName(_options.Right.SchemaName, _options.Right.ClassName); if (leftCls == null) throw new FdoETLException("Left class not found " + _options.Left.SchemaName + ":" + _options.Left.ClassName); if (rightCls == null) throw new FdoETLException("Right class not found " + _options.Right.SchemaName + ":" + _options.Right.ClassName); var leftJoinProps = new List<string>(_options.JoinPairs.AllKeys); var rightJoinProps = new List<string>(); foreach (var p in leftJoinProps) { rightJoinProps.Add(_options.JoinPairs[p]); } var leftGeom = (!string.IsNullOrEmpty(_options.GeometryProperty) && _options.Side == JoinSide.Left) ? _options.GeometryProperty : null; var rightGeom = (!string.IsNullOrEmpty(_options.GeometryProperty) && _options.Side == JoinSide.Right) ? _options.GeometryProperty : null; PrepareClass(leftCls, _options.LeftProperties, leftJoinProps, _options.LeftPrefix, leftGeom); PrepareClass(rightCls, _options.RightProperties, rightJoinProps, _options.RightPrefix, rightGeom); mergedCls = CreateMergedClass(leftCls, rightCls); } var dprops = new NameValueCollection(); dprops["File"] = Path.GetTempFileName(); var tempSchema = new FeatureSchema("Default", ""); var leftCopy = FdoSchemaUtil.CloneClass(leftCls); var rightCopy = FdoSchemaUtil.CloneClass(rightCls); string leftClassName = "LEFT_SIDE"; string rightClassName = "RIGHT_SIDE"; leftCopy.Name = leftClassName; rightCopy.Name = rightClassName; tempSchema.Classes.Add(leftCopy); tempSchema.Classes.Add(rightCopy); //Create SQLite database Register(new FdoCreateDataStoreOperation("OSGeo.SQLite", dprops, null)); //Apply temp schema var tempConn = new FdoConnection("OSGeo.SQLite", "File=" + dprops["File"]); Register(new FdoApplySchemaOperation(tempConn, tempSchema)); #if DEBUG Register(new FdoSingleActionOperation(() => { SendMessage("Temp db created in: " + dprops["File"]); })); #endif //Prep property mappings for bulk copy var leftMaps = new NameValueCollection(); var rightMaps = new NameValueCollection(); var leftQuery = new FeatureQueryOptions(leftCls.Name); var rightQuery = new FeatureQueryOptions(rightCls.Name); foreach (var leftp in _options.LeftProperties) { if (string.IsNullOrEmpty(_options.LeftPrefix)) leftMaps.Add(leftp, leftp); else leftMaps.Add(leftp, _options.LeftPrefix + leftp); leftQuery.AddFeatureProperty(leftp); } foreach (var rightp in _options.RightProperties) { if (string.IsNullOrEmpty(_options.RightPrefix)) rightMaps.Add(rightp, rightp); else rightMaps.Add(rightp, _options.RightPrefix + rightp); rightQuery.AddFeatureProperty(rightp); } if (!string.IsNullOrEmpty(_options.LeftFilter)) leftQuery.Filter = _options.LeftFilter; if (!string.IsNullOrEmpty(_options.RightFilter)) rightQuery.Filter = _options.RightFilter; //don't forget join keys foreach (string l in _options.JoinPairs.Keys) { string r = _options.JoinPairs[l]; if (!_options.LeftProperties.Contains(l)) { leftQuery.AddFeatureProperty(l); if (string.IsNullOrEmpty(_options.LeftPrefix)) leftMaps.Add(l, l); else leftMaps.Add(l, _options.LeftPrefix + l); } if (!_options.RightProperties.Contains(r)) { rightQuery.AddFeatureProperty(r); if (string.IsNullOrEmpty(_options.RightPrefix)) rightMaps.Add(r, r); else rightMaps.Add(r, _options.RightPrefix + r); } } //don't forget geometry! if (!string.IsNullOrEmpty(_options.GeometryProperty)) { if (_options.Side == JoinSide.Left) { if (!leftQuery.PropertyList.Contains(_options.GeometryProperty)) { leftQuery.AddFeatureProperty(_options.GeometryProperty); if (string.IsNullOrEmpty(_options.LeftPrefix)) leftMaps.Add(_options.GeometryProperty, _options.GeometryProperty); else leftMaps.Add(_options.GeometryProperty, _options.LeftPrefix + _options.GeometryProperty); } } else { if (!rightQuery.PropertyList.Contains(_options.GeometryProperty)) { rightQuery.AddFeatureProperty(_options.GeometryProperty); if (string.IsNullOrEmpty(_options.RightPrefix)) rightMaps.Add(_options.GeometryProperty, _options.GeometryProperty); else rightMaps.Add(_options.GeometryProperty, _options.RightPrefix + _options.GeometryProperty); } } } var copyLeftErrors = new List<Exception>(); var copyRightErrors = new List<Exception>(); var copyTargetErrors = new List<Exception>(); //Copy left source ParameterlessAction copyLeft = () => { SendMessage("Copying left source with filter: " + _options.LeftFilter); var copy = ExpressUtility.CreateBulkCopy( _options.Left.Connection, tempConn, _options.Left.SchemaName, leftQuery, tempSchema.Name, //temp sqlite schema name leftClassName, //sqlite "left" class name leftMaps); copy.ProcessMessage += delegate(object sender, MessageEventArgs e) { SendMessage(e.Message); }; copy.Execute(); copyLeftErrors.AddRange(copy.GetAllErrors()); }; Register(new FdoSingleActionOperation(copyLeft)); //Register(new FdoInputOperation(_options.Left.Connection, leftQuery)); //Register(new FdoOutputOperation(tempConn, leftClassName, leftMaps)); //Copy right source ParameterlessAction copyRight = () => { SendMessage("Copying right source with filter: " + _options.RightFilter); var copy = ExpressUtility.CreateBulkCopy( _options.Right.Connection, tempConn, _options.Right.SchemaName, rightQuery, tempSchema.Name, //temp sqlite schema name rightClassName, //sqlite "right" class name rightMaps); copy.ProcessMessage += delegate(object sender, MessageEventArgs e) { SendMessage(e.Message); }; copy.Execute(); copyRightErrors.AddRange(copy.GetAllErrors()); }; Register(new FdoSingleActionOperation(copyRight)); //Register(new FdoInputOperation(_options.Right.Connection, rightQuery)); //Register(new FdoOutputOperation(tempConn, rightClassName, rightMaps)); string srcClass = "VIEW_INPUT"; //Create indexes on left and right sides to optimize read performance ParameterlessAction indexLeft = () => { using (var svc = tempConn.CreateFeatureService()) { SendMessage("Creating left side index in temp db"); string sql = "CREATE INDEX IDX_LEFT_ID ON " + leftClassName + "("; var tokens = new List<string>(); foreach (string p in _options.JoinPairs.Keys) { if (!string.IsNullOrEmpty(_options.LeftPrefix)) tokens.Add(_options.LeftPrefix + p); else tokens.Add(p); } sql = sql + string.Join(", ", tokens.ToArray()) + ")"; SendMessage("Executing SQL: " + sql); svc.ExecuteSQLNonQuery(sql); } }; ParameterlessAction indexRight = () => { using (var svc = tempConn.CreateFeatureService()) { SendMessage("Creating right side index in temp db"); string sql = "CREATE INDEX IDX_RIGHT_ID ON " + rightClassName + "("; var tokens = new List<string>(); foreach (string p in _options.JoinPairs.Keys) { string prop = _options.JoinPairs[p]; if (!string.IsNullOrEmpty(_options.RightPrefix)) tokens.Add(_options.RightPrefix + prop); else tokens.Add(prop); } sql = sql + string.Join(", ", tokens.ToArray()) + ")"; SendMessage("Executing SQL: " + sql); svc.ExecuteSQLNonQuery(sql); } }; Register(new FdoSingleActionOperation(indexLeft)); Register(new FdoSingleActionOperation(indexRight)); //Create view ParameterlessAction createView = () => { using (var svc = tempConn.CreateFeatureService()) { SendMessage("Creating view in temp db"); StringBuilder sql = new StringBuilder("CREATE VIEW "); sql.Append(srcClass + " AS SELECT "); foreach (var p in _options.LeftProperties) { if (!string.IsNullOrEmpty(_options.LeftPrefix)) sql.Append("l." + _options.LeftPrefix + p + ", "); else sql.Append("l." + p + ", "); } if (!string.IsNullOrEmpty(_options.GeometryProperty)) { if (_options.Side == JoinSide.Left) { if (!_options.LeftProperties.Contains(_options.GeometryProperty)) { if (!string.IsNullOrEmpty(_options.LeftPrefix)) sql.Append("l." + _options.LeftPrefix + _options.GeometryProperty + ", "); else sql.Append("l." + _options.GeometryProperty + ", "); } } else { if (!_options.RightProperties.Contains(_options.GeometryProperty)) { if (!string.IsNullOrEmpty(_options.RightPrefix)) sql.Append("r." + _options.RightPrefix + _options.GeometryProperty + ", "); else sql.Append("r." + _options.GeometryProperty + ", "); } } } int rc = _options.RightProperties.Count; int i = 0; foreach (var p in _options.RightProperties) { string pn = p; if (!string.IsNullOrEmpty(_options.RightPrefix)) pn = _options.RightPrefix + pn; if (i == rc - 1) sql.Append("r." + pn + " FROM "); else sql.Append("r." + pn + ", "); i++; } sql.Append(leftClassName + " l "); switch (_options.JoinType) { case FdoJoinType.Inner: sql.Append("INNER JOIN " + rightClassName + " r ON "); break; case FdoJoinType.Left: sql.Append("LEFT OUTER JOIN " + rightClassName + " r ON "); break; default: throw new FdoETLException("Unsupported join type: " + _options.JoinType); } rc = _options.JoinPairs.Count; i = 0; foreach (string l in _options.JoinPairs.Keys) { string r = _options.JoinPairs[l]; string left = l; string right = r; if (!string.IsNullOrEmpty(_options.LeftPrefix)) left = _options.LeftPrefix + left; if (!string.IsNullOrEmpty(_options.RightPrefix)) right = _options.RightPrefix + right; if (i == rc - 1) sql.Append("l." + left + " = r." + right); else sql.Append("l." + left + " = r." + right + " AND "); i++; } SendMessage("Executing SQL: " + sql.ToString()); svc.ExecuteSQLNonQuery(sql.ToString()); } }; Register(new FdoSingleActionOperation(createView)); //Hack FDO metadata to make this a feature class if (!string.IsNullOrEmpty(_options.GeometryProperty)) { ParameterlessAction reg = () => { using (var svc = tempConn.CreateFeatureService()) { SendMessage("Exposing view as a FDO feature class"); string sql = "INSERT INTO geometry_columns(f_table_name, f_geometry_column, geometry_type, geometry_dettype, coord_dimension, srid, geometry_format) VALUES('" + srcClass + "','" + _options.GeometryProperty + "',15,7743,0,0,'FGF')"; SendMessage("Executing SQL: " + sql.ToString()); svc.ExecuteSQLNonQuery(sql); } }; Register(new FdoSingleActionOperation(reg)); } //Copy view to target ParameterlessAction applyTarget = () => { using (var svc = _options.Target.Connection.CreateFeatureService()) { SendMessage("Fetching target schema"); var schema = svc.GetSchemaByName(_options.Target.SchemaName); IncompatibleClass cls; if (!svc.CanApplyClass(mergedCls, out cls)) { SendMessage("Fixing incompatibilities in merged class"); mergedCls = svc.AlterClassDefinition(mergedCls, cls); } SendMessage("Adding merged class to target schema"); schema.Classes.Add(mergedCls); SendMessage("Applying modified target schema"); svc.ApplySchema(schema); } }; Register(new FdoSingleActionOperation(applyTarget)); var tempQuery = new FeatureQueryOptions("VIEW_INPUT"); var targetMapping = new NameValueCollection(); foreach(PropertyDefinition p in mergedCls.Properties) { tempQuery.AddFeatureProperty(p.Name); //Target class is a replica of the temp one, so all properties //have the same name in both source and target targetMapping[p.Name] = p.Name; } ParameterlessAction copyToTarget = () => { var copy = ExpressUtility.CreateBulkCopy( tempConn, _options.Target.Connection, tempSchema.Name, tempQuery, _options.Target.SchemaName, _options.Target.ClassName, targetMapping); copy.ProcessMessage += delegate(object sender, MessageEventArgs e) { SendMessage(e.Message); }; copy.Execute(); copyTargetErrors.AddRange(copy.GetAllErrors()); sw.Stop(); }; Register(new FdoSingleActionOperation(copyToTarget)); //Log all errors ParameterlessAction logErrors = () => { SendMessage(copyLeftErrors.Count + " errors encountered copying left source to temp db"); _allErrors.AddRange(copyLeftErrors); SendMessage(copyRightErrors.Count + " errors encountered copying right source to temp db"); _allErrors.AddRange(copyRightErrors); SendMessage(copyTargetErrors.Count + " errors encountered copying merged source to target"); _allErrors.AddRange(copyTargetErrors); SendMessage("Join Operation completed in " + sw.Elapsed.ToString()); }; Register(new FdoSingleActionOperation(logErrors)); }
/// <summary> /// Creates the query for the left side of the join /// </summary> /// <returns></returns> internal FeatureQueryOptions CreateLeftQuery() { FeatureQueryOptions qry = new FeatureQueryOptions(_Left.ClassName); return qry; }
/// <summary> /// Creates the query for the right side of the join /// </summary> /// <returns></returns> internal FeatureQueryOptions CreateRightQuery() { FeatureQueryOptions qry = new FeatureQueryOptions(_Right.ClassName); return qry; }
/// <summary> /// Initializes a new instance of the <see cref="FdoInputOperation"/> class. /// </summary> /// <param name="conn">The conn.</param> /// <param name="query">The query.</param> public FdoInputOperation(FdoConnection conn, FeatureQueryOptions query) { _conn = conn; this.Query = query; }
/// <summary> /// Utility method to create a bulk copy operation from /// one class to another /// </summary> /// <param name="sourceConn"></param> /// <param name="targetConn"></param> /// <param name="srcSchemaName"></param> /// <param name="srcQuery"></param> /// <param name="targetSchemaName"></param> /// <param name="targetClassName"></param> /// <param name="propertyMapping"></param> /// <returns></returns> public static FdoBulkCopy CreateBulkCopy( FdoConnection sourceConn, FdoConnection targetConn, string srcSchemaName, FeatureQueryOptions srcQuery, string targetSchemaName, string targetClassName, NameValueCollection propertyMapping) { var dict = new Dictionary<string, FdoConnection>(); dict["SOURCE"] = sourceConn; dict["TARGET"] = targetConn; var opts = new FdoBulkCopyOptions(dict, false); var copt = new FdoClassCopyOptions( "SOURCE", "TARGET", srcSchemaName, srcQuery.ClassName, targetSchemaName, targetClassName); if (!string.IsNullOrEmpty(srcQuery.Filter)) copt.SourceFilter = srcQuery.Filter; foreach (string p in propertyMapping.Keys) { copt.AddPropertyMapping(p, propertyMapping[p]); } copt.FlattenGeometries = true; copt.ForceWkb = true; opts.AddClassCopyOption(copt); return new FdoBulkCopy(opts, 100); }
/// <summary> /// Utility method to create a feature class dump bulk copy /// </summary> /// <param name="source"></param> /// <param name="schemaName"></param> /// <param name="className"></param> /// <param name="provider"></param> /// <param name="savePath"></param> /// <returns></returns> public static FdoBulkCopy CreateBulkCopy(FdoConnection source, string schemaName, string className, string provider, string savePath) { if (!ExpressUtility.CreateFlatFileDataSource(provider, savePath)) throw new FdoException("Could not create " + savePath); ClassDefinition srcClass = null; using (var svc = source.CreateFeatureService()) { srcClass = svc.GetClassByName(schemaName, className); } //Apply a copy of the source class to target ClassDefinition clone = FdoSchemaUtil.CloneClass(srcClass, true); FeatureSchema fs = null; FdoConnection target = ExpressUtility.CreateFlatFileConnection(provider, savePath); using (var svc = target.CreateFeatureService()) { var schemas = svc.DescribeSchema(); if (schemas != null && schemas.Count == 1) fs = schemas[0]; if (fs == null) fs = new FeatureSchema("Default", ""); var classes = fs.Classes; classes.Add(clone); svc.ApplySchema(fs, null, true); } //Setup mappings var mappings = new NameValueCollection(); foreach (PropertyDefinition prop in srcClass.Properties) { if (prop.PropertyType == PropertyType.PropertyType_DataProperty || prop.PropertyType == PropertyType.PropertyType_GeometricProperty) { mappings.Add(prop.Name, prop.Name); } } //Compile query var query = new FeatureQueryOptions(className); var bcp = CreateBulkCopy(source, target, schemaName, query, fs.Name, clone.Name, mappings); //The target connection needs to be cleaned up when done bcp.Options.MarkOwnerOfConnection("TARGET"); return bcp; }
private void btnFetch_Click(object sender, EventArgs e) { if (cmbProperty.SelectedItem != null) { string prop = cmbProperty.SelectedItem.ToString(); //Try in order of support // // 1 - ISelectAggregate w/ distinct = true // 2 - "SELECT DISTINCT [property] FROM [table]" // 3 - Brute force. Prompt user for confirmation first using (new TempCursor(Cursors.WaitCursor)) { using (FdoFeatureService svc = _conn.CreateFeatureService()) { bool supportsDistinct = _conn.Capability.GetBooleanCapability(CapabilityType.FdoCapabilityType_SupportsSelectDistinct); if (svc.SupportsCommand(OSGeo.FDO.Commands.CommandType.CommandType_SelectAggregates) && supportsDistinct) { //SortedList not only allows us to hackishly get set-like qualities, but we get sorting for free. SortedList<string, string> values = new SortedList<string, string>(); FeatureAggregateOptions opts = new FeatureAggregateOptions(_ClassDef.Name); opts.AddFeatureProperty(prop); opts.Distinct = true; using (IFdoReader reader = svc.SelectAggregates(opts)) { while (reader.ReadNext()) { if (!reader.IsNull(prop)) { values.Add(reader[prop].ToString(), string.Empty); } } } lstValues.DataSource = new List<string>(values.Keys); lblValueCount.Text = "(" + values.Keys.Count + ")"; } else if (svc.SupportsCommand(OSGeo.FDO.Commands.CommandType.CommandType_SQLCommand)) { string sql = string.Format("SELECT DISTINCT {0} FROM {1} ORDER BY {0}", prop, _ClassDef.Name); List<string> values = new List<string>(); using (IFdoReader reader = svc.ExecuteSQLQuery(sql)) { while (reader.ReadNext()) { if (!reader.IsNull(prop)) { values.Add(reader[prop].ToString()); } } } lstValues.DataSource = values; lblValueCount.Text = "(" + values.Count + ")"; } else { if (MessageService.AskQuestion("About to fetch distinct values by brute force. Continue?", "Get Values")) { //SortedList not only allows us to hackishly get set-like qualities, but we get sorting for free. SortedList<string, string> values = new SortedList<string, string>(); FeatureQueryOptions query = new FeatureQueryOptions(_ClassDef.QualifiedName); query.AddFeatureProperty(prop); using (IFdoReader reader = svc.SelectFeatures(query)) { while (reader.ReadNext()) { if (!reader.IsNull(prop)) { values.Add(reader[prop].ToString(), string.Empty); } } } lstValues.DataSource = new List<string>(values.Keys); lblValueCount.Text = "(" + values.Keys.Count + ")"; } } } } } }
private FeatureQueryOptions CreateSourceQuery() { FeatureQueryOptions query = new FeatureQueryOptions(Options.SourceSchema + ":" + Options.SourceClassName); query.AddFeatureProperty(Options.SourcePropertyNames); foreach (string alias in Options.SourceAliases) { query.AddComputedProperty(alias, Options.GetExpression(alias)); } if (!string.IsNullOrEmpty(Options.SourceFilter)) query.Filter = Options.SourceFilter; return query; }