Exemple #1
0
        private void FillList <T>(ICollection <T> list, QueryDefinition qd, EntityDefinition ed) where T : IEntity
        {
            var sql = GetSqlHelper.GetSql(Settings.Current.Building.SourceEngine.Database,
                                          qd.GetSql(Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchema), Settings.Current.Building.SourceSchema);

            if (string.IsNullOrEmpty(sql))
            {
                return;
            }

            var keys = new Dictionary <string, bool>();

            using (var connection = new OdbcConnection(Settings.Current.Building.SourceConnectionString))
            {
                connection.Open();
                using (var c = new OdbcCommand(sql, connection))
                {
                    c.CommandTimeout = 30000;
                    using (var reader = c.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            Concept conceptDef = null;
                            if (ed.Concepts != null && ed.Concepts.Any())
                            {
                                conceptDef = ed.Concepts[0];
                            }

                            var concept = (T)ed.GetConcepts(conceptDef, reader, null).ToList()[0];

                            var key = concept.GetKey();
                            if (key == null)
                            {
                                continue;
                            }

                            if (keys.ContainsKey(key))
                            {
                                continue;
                            }

                            keys.Add(key, false);

                            list.Add(concept);

                            if (CurrentState != BuilderState.Running)
                            {
                                break;
                            }
                        }
                    }
                }
            }
        }
Exemple #2
0
        public IEnumerable <List <KeyValuePair <string, string> > > GetPersonKeys(long batches, int batchSize)
        {
            var batch = new List <KeyValuePair <string, string> >(batchSize);

            var query = GetSqlHelper.GetSql(Settings.Current.Building.SourceEngine.Database, Settings.Current.Building.BatchScript, Settings.Current.Building.SourceSchemaName);

            foreach (var reader in dbSource.GetPersonKeys(query, batches, batchSize))
            {
                if (batch.Count == batchSize)
                {
                    yield return(batch);

                    batch.Clear();
                }

                var id     = reader[0].ToString().Trim();
                var source = reader[1].ToString().Trim(); // ? replace to reader[1].ToString(), without Trim()

                batch.Add(new KeyValuePair <string, string>(id, source));
            }

            yield return(batch);
        }
        public IEnumerable <List <KeyValuePair <string, string> > > GetPersonKeys(long batches, int batchSize)
        {
            var batch = new List <KeyValuePair <string, string> >(batchSize);

            var query = GetSqlHelper.GetSql(_settings.SourceEngine.Database, _settings.BatchScript, _settings.ConversionSettings.SourceSchema);

            foreach (var reader in _dbSource.GetPersonKeys(query, batches, batchSize))
            {
                if (batch.Count == batchSize)
                {
                    yield return(batch);

                    batch.Clear();
                }

                var id     = reader[0].ToString().Trim();
                var source = reader[1].ToString().Trim();

                batch.Add(new KeyValuePair <string, string>(id, source));
            }

            yield return(batch);
        }
        public KeyValuePair <string, Exception> Load(IDatabaseEngine sourceEngine, string sourceSchemaName, List <QueryDefinition> sourceQueryDefinitions, OdbcConnection sourceConnection, string vendor)
        {
            var fileName         = string.Empty;
            var query            = string.Empty;
            var connectionString = string.Empty;

            try
            {
                var timer = new Stopwatch();
                timer.Start();
                foreach (var qd in sourceQueryDefinitions)
                {
                    if (qd.Providers != null)
                    {
                        continue;
                    }
                    if (qd.Locations != null)
                    {
                        continue;
                    }
                    if (qd.CareSites != null)
                    {
                        continue;
                    }

                    fileName = qd.FileName;

                    var sql = GetSqlHelper.GetSql(sourceEngine.Database,
                                                  qd.GetSql(vendor, sourceSchemaName),
                                                  sourceSchemaName);

                    if (string.IsNullOrEmpty(sql))
                    {
                        continue;
                    }

                    var q = string.Format(sql, ChunkId);

                    using (var cdm = sourceEngine.GetCommand(q, sourceConnection))
                    {
                        cdm.CommandTimeout = 30000;
                        using (var reader =
                                   sourceEngine.ReadChunkData(sourceConnection, cdm, qd, ChunkId,
                                                              Prefix))
                        {
                            while (reader.Read())
                            {
                                PopulateData(qd, reader);
                            }
                        }
                    }
                }

                timer.Stop();
            }
            catch (Exception e)
            {
                var info = new StringBuilder();
                info.AppendLine("SourceEngine=" + sourceEngine);
                info.AppendLine("SourceConnectionString=" + connectionString);
                info.AppendLine("File name=" + fileName);
                info.AppendLine("Query:");
                info.AppendLine(query);

                return(new KeyValuePair <string, Exception>(info.ToString(), e));
            }


            return(new KeyValuePair <string, Exception>(null, null));
        }
Exemple #5
0
 public string GetSql(Database sourceDatabase, Vendors vendor, string schemaName)
 {
     return(GetSqlHelper.GetSql(sourceDatabase, GetSql(vendor, schemaName), schemaName));
 }
        private IEnumerable <string> GetParts()
        {
            var folder = $"{Settings.Settings.Current.Building.Vendor}/{Settings.Settings.Current.Building.Id}/raw";

            var fileName = string.Empty;

            foreach (var qd in Settings.Settings.Current.Building.SourceQueryDefinitions)
            {
                //var sql = qd.GetSql(Settings.Current.Building.SourceEngine.Database,
                //   Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchemaName);

                var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database,
                                              qd.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName);

                if (qd.Persons == null)
                {
                    continue;
                }
                if (string.IsNullOrEmpty(sql))
                {
                    continue;
                }

                fileName = qd.FileName;
                break;
            }

            if (string.IsNullOrEmpty(fileName))
            {
                fileName = Settings.Settings.Current.Building.SourceQueryDefinitions[0].FileName;
            }

            using (var client = new AmazonS3Client(Settings.Settings.Current.S3AwsAccessKeyId, Settings.Settings.Current.S3AwsSecretAccessKey, Amazon.RegionEndpoint.USEast1))
            {
                var request = new ListObjectsV2Request
                {
                    BucketName = Settings.Settings.Current.Bucket,
                    Prefix     = $"{folder}/{_chunkId}/{fileName}/{fileName}"
                };

                ListObjectsV2Response response;
                do
                {
                    var responseTask = client.ListObjectsV2Async(request);
                    responseTask.Wait();
                    response = responseTask.Result;

                    var partIndexes = new HashSet <string>();

                    var fn = fileName;
                    foreach (var entry in response.S3Objects)
                    {
                        var end = entry.Key.LastIndexOf(fn, StringComparison.InvariantCultureIgnoreCase) + fn.Length;
                        var key = entry.Key.Replace(entry.Key.Substring(0, end), "");
                        key = key.Substring(0, key.IndexOf('_'));
                        partIndexes.Add(key);
                    }

                    foreach (var partIndex in partIndexes)
                    {
                        yield return(partIndex);
                    }

                    request.ContinuationToken = response.NextContinuationToken;
                } while (response.IsTruncated);
            }
        }
        public void Process()
        {
            try
            {
                var dbChunk = new DbChunk(Settings.Settings.Current.Building.BuilderConnectionString);
                var timer   = new Stopwatch();
                timer.Start();

                var folder = $"{Settings.Settings.Current.Building.Vendor}/{Settings.Settings.Current.Building.Id}/raw";

                Parallel.ForEach(Settings.Settings.Current.Building.SourceQueryDefinitions, qd =>
                {
                    if (qd.Providers != null)
                    {
                        return;
                    }
                    if (qd.Locations != null)
                    {
                        return;
                    }
                    if (qd.CareSites != null)
                    {
                        return;
                    }

                    //var sql = qd.GetSql(Settings.Current.Building.SourceEngine.Database,
                    //   Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchemaName);

                    var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database,
                                                  qd.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName);


                    if (string.IsNullOrEmpty(sql))
                    {
                        return;
                    }

                    qd.FieldHeaders = new Dictionary <string, int>(StringComparer.OrdinalIgnoreCase);

                    var metadataKey = $"{folder}/metadata/{qd.FileName + ".txt"}";

                    using (var client = new AmazonS3Client(Settings.Settings.Current.S3AwsAccessKeyId, Settings.Settings.Current.S3AwsSecretAccessKey, Amazon.RegionEndpoint.USEast1))
                        using (var stream = new MemoryStream())
                            using (var sr = new StreamReader(stream))
                            {
                                var request = new GetObjectRequest {
                                    BucketName = Settings.Settings.Current.Bucket, Key = metadataKey
                                };
                                var getObject = client.GetObjectAsync(request);
                                getObject.Wait();

                                using (var response = getObject.Result.ResponseStream)
                                {
                                    response.CopyTo(stream);
                                }
                                stream.Position = 0;

                                var index = 0;
                                foreach (var fieldName in sr.ReadLine().Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries))
                                {
                                    try
                                    {
                                        qd.FieldHeaders.Add(fieldName, index);
                                        index++;
                                    }
                                    catch (Exception)
                                    {
                                        throw new Exception("[RestoreMetadataFromS3] fieldName duplication: " + fieldName + " - " + qd.FileName);
                                    }
                                }
                            }
                });

                Parallel.ForEach(GetParts(), new ParallelOptions {
                    MaxDegreeOfParallelism = 2
                }, p =>
                {
                    Logger.Write(_chunkId, LogMessageTypes.Info, "load part=" + p);
                    var part = new DatabaseChunkPart(_chunkId, _createPersonBuilder, p, 0);

                    LoadPart(part, p);

                    part.Build();

                    SavePart(part, p);
                });


                Logger.Write(_chunkId, LogMessageTypes.Info,
                             $"Loaded - {timer.ElapsedMilliseconds} ms | {(GC.GetTotalMemory(false) / 1024f) / 1024f} Mb");

                dbChunk.ChunkComplete(_chunkId, Settings.Settings.Current.Building.Id.Value);
            }
            catch (Exception e)
            {
                Logger.WriteError(_chunkId, e);

                throw;
            }
        }
Exemple #8
0
        public void Load()
        {
            var timer = new Stopwatch();

            timer.Start();

            //Parallel.ForEach(Settings.Current.Building.SourceQueryDefinitions, new ParallelOptions { MaxDegreeOfParallelism = 1 }, qd =>
            //Parallel.ForEach(Settings.Current.Building.SourceQueryDefinitions, qd =>
            foreach (var qd in Settings.Settings.Current.Building.SourceQueryDefinitions)
            {
                if (qd.Providers != null)
                {
                    continue;
                }
                if (qd.Locations != null)
                {
                    continue;
                }
                if (qd.CareSites != null)
                {
                    continue;
                }

                //var sql = qd.GetSql(Settings.Current.Building.SourceEngine.Database,
                //Settings.Current.Building.Vendor, Settings.Current.Building.SourceSchemaName);

                var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database,
                                              qd.GetSql(Settings.Settings.Current.Building.Vendor, Settings.Settings.Current.Building.SourceSchemaName), Settings.Settings.Current.Building.SourceSchemaName);

                if (string.IsNullOrEmpty(sql))
                {
                    continue;
                }

                if (Settings.Settings.Current.Building.SourceEngine.Database != Database.Redshift)
                {
                    var q = string.Format(sql, ChunkId);

                    using (var conn = Settings.Settings.Current.Building.SourceEngine.GetConnection(Settings.Settings.Current.Building.SourceConnectionString))
                        using (var cdm = Settings.Settings.Current.Building.SourceEngine.GetCommand(q, conn))
                            using (var reader = Settings.Settings.Current.Building.SourceEngine.ReadChunkData(conn, cdm, qd, ChunkId, Prefix))
                            {
                                while (reader.Read())
                                {
                                    PopulateData(qd, reader);
                                }
                            }
                }
                else
                {
                    using (var reader = Settings.Settings.Current.Building.SourceEngine.ReadChunkData(null, null, qd, ChunkId, Prefix))
                    {
                        while (reader.Read())
                        {
                            PopulateData(qd, reader);
                        }
                    }
                }
            }
            timer.Stop();

            Logger.Write(ChunkId, LogMessageTypes.Info,
                         string.Format(Prefix + ") loaded - {0} ms | {1} Mb", timer.ElapsedMilliseconds,
                                       (GC.GetTotalMemory(false) / 1024f) / 1024f));
        }
Exemple #9
0
        private void MoveChunkDataToS3()
        {
            var chunkIds = _dbChunk.GetNotMovedToS3Chunks(Settings.Settings.Current.Building.Id.Value).ToArray();

            if (chunkIds.Length == 0)
            {
                return;
            }

            var baseFolder =
                $"{Settings.Settings.Current.Bucket}/{Settings.Settings.Current.Building.Vendor}/{Settings.Settings.Current.Building.Id}/raw";

            Console.WriteLine("S3 raw folder - " + baseFolder);

            Parallel.ForEach(Settings.Settings.Current.Building.SourceQueryDefinitions, queryDefinition =>
            {
                if (queryDefinition.Providers != null)
                {
                    return;
                }
                if (queryDefinition.Locations != null)
                {
                    return;
                }
                if (queryDefinition.CareSites != null)
                {
                    return;
                }

                var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database,
                                              queryDefinition.GetSql(Settings.Settings.Current.Building.Vendor,
                                                                     Settings.Settings.Current.Building.SourceSchemaName),
                                              Settings.Settings.Current.Building.SourceSchemaName);

                if (string.IsNullOrEmpty(sql))
                {
                    return;
                }

                sql = string.Format(sql, chunkIds[0]);

                if (queryDefinition.FieldHeaders == null)
                {
                    StoreMetadataToS3(queryDefinition, sql);
                }
            });

            Parallel.ForEach(chunkIds, new ParallelOptions {
                MaxDegreeOfParallelism = 2
            }, cId =>
            {
                var chunkId = cId;

                Parallel.ForEach(Settings.Settings.Current.Building.SourceQueryDefinitions,
                                 new ParallelOptions {
                    MaxDegreeOfParallelism = 5
                }, queryDefinition =>
                {
                    try
                    {
                        if (queryDefinition.Providers != null)
                        {
                            return;
                        }
                        if (queryDefinition.Locations != null)
                        {
                            return;
                        }
                        if (queryDefinition.CareSites != null)
                        {
                            return;
                        }

                        var sql = GetSqlHelper.GetSql(Settings.Settings.Current.Building.SourceEngine.Database,
                                                      queryDefinition.GetSql(Settings.Settings.Current.Building.Vendor,
                                                                             Settings.Settings.Current.Building.SourceSchemaName),
                                                      Settings.Settings.Current.Building.SourceSchemaName);

                        if (string.IsNullOrEmpty(sql))
                        {
                            return;
                        }

                        sql = string.Format(sql, chunkId);

                        //if (queryDefinition.FieldHeaders == null)
                        //{
                        //    StoreMetadataToS3(queryDefinition, sql);
                        //}

                        var personIdField = queryDefinition.GetPersonIdFieldName();
                        var tmpTableName  = "#" + queryDefinition.FileName + "_" + chunkId;


                        var folder   = $"{baseFolder}/{chunkId}/{queryDefinition.FileName}";
                        var fileName = $@"{folder}/{queryDefinition.FileName}";

                        var unloadQuery = string.Format(@"create table {0} sortkey ({1}) distkey ({1}) as {2}; " +
                                                        @"UNLOAD ('select * from {0} order by {1}') to 's3://{3}' " +
                                                        @"DELIMITER AS '\t' " +
                                                        @"credentials 'aws_access_key_id={4};aws_secret_access_key={5}' " +
                                                        @"GZIP ALLOWOVERWRITE PARALLEL ON",
                                                        tmpTableName,                                    //0
                                                        personIdField,                                   //1
                                                        sql,                                             //2
                                                        fileName,                                        //3
                                                        Settings.Settings.Current.S3AwsAccessKeyId,      //4
                                                        Settings.Settings.Current.S3AwsSecretAccessKey); //5

                        using (var connection =
                                   SqlConnectionHelper.OpenOdbcConnection(Settings.Settings.Current.Building
                                                                          .SourceConnectionString))
                            using (var c = new OdbcCommand(unloadQuery, connection))
                            {
                                c.CommandTimeout = 999999999;
                                c.ExecuteNonQuery();
                            }
                    }
                    catch (Exception e)
                    {
                        Logger.WriteError(chunkId, e);
                        throw;
                    }
                });

                _dbChunk.ChunkCreated(chunkId, Settings.Settings.Current.Building.Id.Value);
                Console.WriteLine("Raw data for chunkId=" + chunkId + " is available on S3");
            });
        }