private void FillList <T>(ICollection <T> list, QueryDefinition qd, EntityDefinition ed) where T : IEntity { var sql = GetSqlHelper.GetSql(Settings.Current.Building.SourceEngine.Database, qd.GetSql(Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchema), Settings.Current.Building.SourceSchema); if (string.IsNullOrEmpty(sql)) { return; } var keys = new Dictionary <string, bool>(); using (var connection = new OdbcConnection(Settings.Current.Building.SourceConnectionString)) { connection.Open(); using (var c = new OdbcCommand(sql, connection)) { c.CommandTimeout = 30000; using (var reader = c.ExecuteReader()) { while (reader.Read()) { Concept conceptDef = null; if (ed.Concepts != null && ed.Concepts.Any()) { conceptDef = ed.Concepts[0]; } var concept = (T)ed.GetConcepts(conceptDef, reader, null).ToList()[0]; var key = concept.GetKey(); if (key == null) { continue; } if (keys.ContainsKey(key)) { continue; } keys.Add(key, false); list.Add(concept); if (CurrentState != BuilderState.Running) { break; } } } } } }
public IEnumerable <List <KeyValuePair <string, string> > > GetPersonKeys(long batches, int batchSize) { var batch = new List <KeyValuePair <string, string> >(batchSize); var query = GetSqlHelper.GetSql(Settings.Current.Building.SourceEngine.Database, Settings.Current.Building.BatchScript, Settings.Current.Building.SourceSchemaName); foreach (var reader in dbSource.GetPersonKeys(query, batches, batchSize)) { if (batch.Count == batchSize) { yield return(batch); batch.Clear(); } var id = reader[0].ToString().Trim(); var source = reader[1].ToString().Trim(); // ? replace to reader[1].ToString(), without Trim() batch.Add(new KeyValuePair <string, string>(id, source)); } yield return(batch); }
public IEnumerable <List <KeyValuePair <string, string> > > GetPersonKeys(long batches, int batchSize) { var batch = new List <KeyValuePair <string, string> >(batchSize); var query = GetSqlHelper.GetSql(_settings.SourceEngine.Database, _settings.BatchScript, _settings.ConversionSettings.SourceSchema); foreach (var reader in _dbSource.GetPersonKeys(query, batches, batchSize)) { if (batch.Count == batchSize) { yield return(batch); batch.Clear(); } var id = reader[0].ToString().Trim(); var source = reader[1].ToString().Trim(); batch.Add(new KeyValuePair <string, string>(id, source)); } yield return(batch); }
public KeyValuePair <string, Exception> Load(IDatabaseEngine sourceEngine, string sourceSchemaName, List <QueryDefinition> sourceQueryDefinitions, OdbcConnection sourceConnection, string vendor) { var fileName = string.Empty; var query = string.Empty; var connectionString = string.Empty; try { var timer = new Stopwatch(); timer.Start(); foreach (var qd in sourceQueryDefinitions) { if (qd.Providers != null) { continue; } if (qd.Locations != null) { continue; } if (qd.CareSites != null) { continue; } fileName = qd.FileName; var sql = GetSqlHelper.GetSql(sourceEngine.Database, qd.GetSql(vendor, sourceSchemaName), sourceSchemaName); if (string.IsNullOrEmpty(sql)) { continue; } var q = string.Format(sql, ChunkId); using (var cdm = sourceEngine.GetCommand(q, sourceConnection)) { cdm.CommandTimeout = 30000; using (var reader = sourceEngine.ReadChunkData(sourceConnection, cdm, qd, ChunkId, Prefix)) { while (reader.Read()) { PopulateData(qd, reader); } } } } timer.Stop(); } catch (Exception e) { var info = new StringBuilder(); info.AppendLine("SourceEngine=" + sourceEngine); info.AppendLine("SourceConnectionString=" + connectionString); info.AppendLine("File name=" + fileName); info.AppendLine("Query:"); info.AppendLine(query); return(new KeyValuePair <string, Exception>(info.ToString(), e)); } return(new KeyValuePair <string, Exception>(null, null)); }
public string GetSql(Database sourceDatabase, Vendors vendor, string schemaName) { return(GetSqlHelper.GetSql(sourceDatabase, GetSql(vendor, schemaName), schemaName)); }
private IEnumerable <string> GetParts() { var folder = $"{Settings.Settings.Current.Building.Vendor}/{Settings.Settings.Current.Building.Id}/raw"; var fileName = string.Empty; foreach (var qd in Settings.Settings.Current.Building.SourceQueryDefinitions) { //var sql = qd.GetSql(Settings.Current.Building.SourceEngine.Database, // Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchemaName); var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database, qd.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName); if (qd.Persons == null) { continue; } if (string.IsNullOrEmpty(sql)) { continue; } fileName = qd.FileName; break; } if (string.IsNullOrEmpty(fileName)) { fileName = Settings.Settings.Current.Building.SourceQueryDefinitions[0].FileName; } using (var client = new AmazonS3Client(Settings.Settings.Current.S3AwsAccessKeyId, Settings.Settings.Current.S3AwsSecretAccessKey, Amazon.RegionEndpoint.USEast1)) { var request = new ListObjectsV2Request { BucketName = Settings.Settings.Current.Bucket, Prefix = $"{folder}/{_chunkId}/{fileName}/{fileName}" }; ListObjectsV2Response response; do { var responseTask = client.ListObjectsV2Async(request); responseTask.Wait(); response = responseTask.Result; var partIndexes = new HashSet <string>(); var fn = fileName; foreach (var entry in response.S3Objects) { var end = entry.Key.LastIndexOf(fn, StringComparison.InvariantCultureIgnoreCase) + fn.Length; var key = entry.Key.Replace(entry.Key.Substring(0, end), ""); key = key.Substring(0, key.IndexOf('_')); partIndexes.Add(key); } foreach (var partIndex in partIndexes) { yield return(partIndex); } request.ContinuationToken = response.NextContinuationToken; } while (response.IsTruncated); } }
public void Process() { try { var dbChunk = new DbChunk(Settings.Settings.Current.Building.BuilderConnectionString); var timer = new Stopwatch(); timer.Start(); var folder = $"{Settings.Settings.Current.Building.Vendor}/{Settings.Settings.Current.Building.Id}/raw"; Parallel.ForEach(Settings.Settings.Current.Building.SourceQueryDefinitions, qd => { if (qd.Providers != null) { return; } if (qd.Locations != null) { return; } if (qd.CareSites != null) { return; } //var sql = qd.GetSql(Settings.Current.Building.SourceEngine.Database, // Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchemaName); var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database, qd.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName); if (string.IsNullOrEmpty(sql)) { return; } qd.FieldHeaders = new Dictionary <string, int>(StringComparer.OrdinalIgnoreCase); var metadataKey = $"{folder}/metadata/{qd.FileName + ".txt"}"; using (var client = new AmazonS3Client(Settings.Settings.Current.S3AwsAccessKeyId, Settings.Settings.Current.S3AwsSecretAccessKey, Amazon.RegionEndpoint.USEast1)) using (var stream = new MemoryStream()) using (var sr = new StreamReader(stream)) { var request = new GetObjectRequest { BucketName = Settings.Settings.Current.Bucket, Key = metadataKey }; var getObject = client.GetObjectAsync(request); getObject.Wait(); using (var response = getObject.Result.ResponseStream) { response.CopyTo(stream); } stream.Position = 0; var index = 0; foreach (var fieldName in sr.ReadLine().Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)) { try { qd.FieldHeaders.Add(fieldName, index); index++; } catch (Exception) { throw new Exception("[RestoreMetadataFromS3] fieldName duplication: " + fieldName + " - " + qd.FileName); } } } }); Parallel.ForEach(GetParts(), new ParallelOptions { MaxDegreeOfParallelism = 2 }, p => { Logger.Write(_chunkId, LogMessageTypes.Info, "load part=" + p); var part = new DatabaseChunkPart(_chunkId, _createPersonBuilder, p, 0); LoadPart(part, p); part.Build(); SavePart(part, p); }); Logger.Write(_chunkId, LogMessageTypes.Info, $"Loaded - {timer.ElapsedMilliseconds} ms | {(GC.GetTotalMemory(false) / 1024f) / 1024f} Mb"); dbChunk.ChunkComplete(_chunkId, Settings.Settings.Current.Building.Id.Value); } catch (Exception e) { Logger.WriteError(_chunkId, e); throw; } }
public void Load() { var timer = new Stopwatch(); timer.Start(); //Parallel.ForEach(Settings.Current.Building.SourceQueryDefinitions, new ParallelOptions { MaxDegreeOfParallelism = 1 }, qd => //Parallel.ForEach(Settings.Current.Building.SourceQueryDefinitions, qd => foreach (var qd in Settings.Settings.Current.Building.SourceQueryDefinitions) { if (qd.Providers != null) { continue; } if (qd.Locations != null) { continue; } if (qd.CareSites != null) { continue; } //var sql = qd.GetSql(Settings.Current.Building.SourceEngine.Database, //Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchemaName); var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database, qd.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName); if (string.IsNullOrEmpty(sql)) { continue; } if (Settings.Settings.Current.Building.SourceEngine.Database != Database.Redshift) { var q = string.Format(sql, ChunkId); using (var conn = Settings.Settings.Current.Building.SourceEngine.GetConnection(Settings.Settings.Current.Building.SourceConnectionString)) using (var cdm = Settings.Settings.Current.Building.SourceEngine.GetCommand(q, conn)) using (var reader = Settings.Settings.Current.Building.SourceEngine.ReadChunkData(conn, cdm, qd, ChunkId, Prefix)) { while (reader.Read()) { PopulateData(qd, reader); } } } else { using (var reader = Settings.Settings.Current.Building.SourceEngine.ReadChunkData(null, null, qd, ChunkId, Prefix)) { while (reader.Read()) { PopulateData(qd, reader); } } } } timer.Stop(); Logger.Write(ChunkId, LogMessageTypes.Info, string.Format(Prefix + ") loaded - {0} ms | {1} Mb", timer.ElapsedMilliseconds, (GC.GetTotalMemory(false) / 1024f) / 1024f)); }
private void MoveChunkDataToS3() { var chunkIds = _dbChunk.GetNotMovedToS3Chunks(Settings.Settings.Current.Building.Id.Value).ToArray(); if (chunkIds.Length == 0) { return; } var baseFolder = $"{Settings.Settings.Current.Bucket}/{Settings.Settings.Current.Building.Vendor}/{Settings.Settings.Current.Building.Id}/raw"; Console.WriteLine("S3 raw folder - " + baseFolder); Parallel.ForEach(Settings.Settings.Current.Building.SourceQueryDefinitions, queryDefinition => { if (queryDefinition.Providers != null) { return; } if (queryDefinition.Locations != null) { return; } if (queryDefinition.CareSites != null) { return; } var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database, queryDefinition.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName); if (string.IsNullOrEmpty(sql)) { return; } sql = string.Format(sql, chunkIds[0]); if (queryDefinition.FieldHeaders == null) { StoreMetadataToS3(queryDefinition, sql); } }); Parallel.ForEach(chunkIds, new ParallelOptions { MaxDegreeOfParallelism = 2 }, cId => { var chunkId = cId; Parallel.ForEach(Settings.Settings.Current.Building.SourceQueryDefinitions, new ParallelOptions { MaxDegreeOfParallelism = 5 }, queryDefinition => { try { if (queryDefinition.Providers != null) { return; } if (queryDefinition.Locations != null) { return; } if (queryDefinition.CareSites != null) { return; } var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database, queryDefinition.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName); if (string.IsNullOrEmpty(sql)) { return; } sql = string.Format(sql, chunkId); //if (queryDefinition.FieldHeaders == null) //{ // StoreMetadataToS3(queryDefinition, sql); //} var personIdField = queryDefinition.GetPersonIdFieldName(); var tmpTableName = "#" + queryDefinition.FileName + "_" + chunkId; var folder = $"{baseFolder}/{chunkId}/{queryDefinition.FileName}"; var fileName = $@"{folder}/{queryDefinition.FileName}"; var unloadQuery = string.Format(@"create table {0} sortkey ({1}) distkey ({1}) as {2}; " + @"UNLOAD ('select * from {0} order by {1}') to 's3://{3}' " + @"DELIMITER AS '\t' " + @"credentials 'aws_access_key_id={4};aws_secret_access_key={5}' " + @"GZIP ALLOWOVERWRITE PARALLEL ON", tmpTableName, //0 personIdField, //1 sql, //2 fileName, //3 Settings.Settings.Current.S3AwsAccessKeyId, //4 Settings.Settings.Current.S3AwsSecretAccessKey); //5 using (var connection = SqlConnectionHelper.OpenOdbcConnection(Settings.Settings.Current.Building .SourceConnectionString)) using (var c = new OdbcCommand(unloadQuery, connection)) { c.CommandTimeout = 999999999; c.ExecuteNonQuery(); } } catch (Exception e) { Logger.WriteError(chunkId, e); throw; } }); _dbChunk.ChunkCreated(chunkId, Settings.Settings.Current.Building.Id.Value); Console.WriteLine("Raw data for chunkId=" + chunkId + " is available on S3"); }); }