public override (List <DataCollection>, List <DataEntity>) GetSchema(DataContainer container) { var connection = CassandraConnectionString.Parse(container.ConnectionString); using (var cluster = Cluster.Builder().AddContactPoint(connection.Address).Build()) { var tables = cluster.Metadata.GetKeyspace(connection.Keyspace).GetTablesMetadata().ToList(); var dataEntities = tables.SelectMany(t => GetSchema(connection.Keyspace, t.Name, cluster, container)).ToList(); var dataCollections = tables.Select(t => new DataCollection(container, t.Name)).ToList(); return(dataCollections, dataEntities); } }
public override bool TestConnection(DataContainer container) { var connection = CassandraConnectionString.Parse(container.ConnectionString); try { using (var cluster = Cluster.Builder().AddContactPoint(connection.Address).Build()) { cluster.Connect(); } return(true); } catch (Exception) { return(false); } }
public override List <DataCollectionMetrics> GetDataCollectionMetrics(DataContainer container) { var dataCollectionMetrics = new List <DataCollectionMetrics>(); var connection = CassandraConnectionString.Parse(container.ConnectionString); try { using (var cluster = Cluster.Builder().AddContactPoint(connection.Address).Build()) { using (var session = cluster.Connect()) { string query = $"select * from system_schema.tables where keyspace_name = '{connection.Keyspace}'"; RowSet res = session.Execute(query); var rows = res.GetRows(); foreach (Row row in rows) { var tableName = row.GetValue <string>("table_name"); string queryCnt = $"select count(*) from {connection.Keyspace}.{tableName}"; RowSet resCnt = session.Execute(queryCnt); var rowCnt = resCnt.First(); dataCollectionMetrics.Add(new DataCollectionMetrics() { Name = tableName, RowCount = (long)rowCnt[0] }); //TODO: what about space usage? } } } } catch (Exception) { return(dataCollectionMetrics); } return(dataCollectionMetrics); }
public override List <string> CollectSample(DataEntity dataEntity, int sampleSize) { var result = new List <string>(); var connection = CassandraConnectionString.Parse(dataEntity.Container.ConnectionString); try { using (var cluster = Cluster.Builder().AddContactPoint(connection.Address).Build()) { using (var session = cluster.Connect()) { session.Execute($"USE {connection.Keyspace}"); string query = $"SELECT COUNT(*) FROM {dataEntity.Collection.Name}"; RowSet res = session.Execute(query); var rowCount = res.GetRows().First(); long totalRows = (long)rowCount[0]; double pct = 0.1 + (double)sampleSize / (totalRows <= 0 ? sampleSize : totalRows); var r = new Random(); //var parts = dataEntity.Name.Split("."); //query = $"SELECT {parts[0]} FROM {dataEntity.Collection.Name}"; //TODO: support UDT types query = $"SELECT {dataEntity.Name} FROM {dataEntity.Collection.Name}"; res = session.Execute(query); var rows = res.GetRows(); foreach (Row row in rows) { if (dataEntity.DbDataType.Equals(ColumnTypeCode.List.ToString())) { var list = row.GetValue <List <string> >(dataEntity.Name); foreach (var item in list) { if (r.NextDouble() < pct) { result.Add(item); } if (result.Count >= sampleSize) { break; } } } else if (dataEntity.DbDataType.Equals(ColumnTypeCode.Map.ToString())) { var list = row.GetValue <IDictionary <string, int> >(dataEntity.Name); //TODO: only map<string,int> type supported???? foreach (var item in list) { if (r.NextDouble() < pct) { result.Add(item.Key); } if (result.Count >= sampleSize) { break; } } } else { if (r.NextDouble() < pct) { switch (dataEntity.DataType) { case DataType.String: result.Add(row.GetValue <string>(dataEntity.Name)); break; case DataType.Int: result.Add(row.GetValue <int>(dataEntity.Name).ToString()); break; case DataType.Double: result.Add(row.GetValue <double>(dataEntity.Name).ToString()); break; case DataType.DateTime: result.Add(row.GetValue <DateTime>(dataEntity.Name).ToString()); break; default: result.Add(row.GetValue <object>(dataEntity.Name).ToString()); break; } } } if (result.Count >= sampleSize) { break; } } } } } catch (Exception) { // Nothing. TODO: rethrow or log } return(result); }