public static string FromDataTable(string tableName, DataTable schema, int[] primaryKeys = null) { var sqlTable = new SqlTableDefinition { TableName = tableName }; // columns foreach (DataRow column in schema.Rows) { var sqlCol = new SqlColumnDefinition(); if (!(schema.Columns.Contains("IsHidden") && (bool)column["IsHidden"])) { sqlCol.ColumnName = column["ColumnName"].ToString(); sqlCol.DataType = CreateTableSqlInternal.SqlGetType(column); if (schema.Columns.Contains("AllowDBNull") && ((bool)column["AllowDBNull"] == false)) { sqlCol.IsNullable = false; } } sqlTable.ColumnDefinitions.Add(sqlCol); } // primary keys var hasKeys = (primaryKeys != null) && (primaryKeys.Length > 0); if (hasKeys) { // user defined keys foreach (var key in primaryKeys) { sqlTable.PrimaryKeyColumnNames.Add(schema.Rows[key]["ColumnName"].ToString()); } } else { // check schema for keys sqlTable.PrimaryKeyColumnNames.AddRange(schema.GetPrimaryKeys()); hasKeys = sqlTable.PrimaryKeyColumnNames.Count > 0; } return(CreateTableSqlInternal.FromSqlTableDefinition(sqlTable)); }
public static string FromSqlTableDefinition(SqlTableDefinition schema) { var sql = "CREATE TABLE [" + schema.TableName + "] (\n"; // columns foreach (var column in schema.ColumnDefinitions) { sql += "\t[" + column.ColumnName + "] " + column.DataType; if (column.IsNullable == false) { sql += " NOT NULL"; } sql += ",\n"; } sql = sql.TrimEnd(',', '\n') + "\n"; // primary keys var pk = ", CONSTRAINT PK_" + schema.TableName + " PRIMARY KEY CLUSTERED ("; var hasKeys = (schema.PrimaryKeyColumnNames?.Count ?? 0) > 0; if (hasKeys) { pk = schema.PrimaryKeyColumnNames.Aggregate(pk, (current, key) => current + key + ", "); } pk = pk.TrimEnd(',', ' ', '\n') + ")\n"; if (hasKeys) { sql += pk; } sql += ")"; return(sql); }
/// <summary> /// Gets schema from a data table, but parses the data to determine what datatype is best fit. /// </summary> /// <param name="outputTableName">The output table name e.g. "CREATE TABLE [NAME] (..."</param> /// <param name="data">Data to analyze</param> /// <returns></returns> public static string FromDataTable_Smart(string outputTableName, DataTable data) { var sqlTable = new SqlTableDefinition { TableName = outputTableName }; //loop through each column for (var colIndex = 0; colIndex < data.Columns.Count; colIndex++) { var rowVals = new HashSet <string>(StringComparer.OrdinalIgnoreCase); //collect row values into hashset for (var rowIndex = 0; rowIndex < data.Rows.Count; rowIndex++) { rowVals.Add(data.Rows[rowIndex][colIndex].ToString()); } sqlTable.ColumnDefinitions.Add(CreateTableSqlInternal.GetBestFitSqlColumnType(rowVals, data.Columns[colIndex].ColumnName)); } return(CreateTableSqlInternal.FromSqlTableDefinition(sqlTable)); }
public static string FromDataReader_Smart(string outputTableName, IEnumerable <Func <DataReaderInfo> > dataReaders, int?numberOfRowsToExamine = null) { //maps column names to hashset of unique values present for those columns var uniqueValList = new ConcurrentDictionary <string, ConcurrentHashSet <string> >(); //we want the field names because these are our output table sql's column names //this is not thread safe var masterListOfFieldNames = new ConcurrentHashSet <string>(); //fit the data types from the data readers' data var result = Parallel.ForEach(dataReaders, new ParallelOptions { MaxDegreeOfParallelism = 12 }, dataReaderInfoFac => { //get column names and setup hash dictionaries var numRows = 0; var drInfo = dataReaderInfoFac(); using var dataReader = drInfo.DataReader; var readerFieldNames = dataReader.GetFieldNames(); //if there was a problem we may have to read to read first to initialize in order to get fields var tryAgain = (readerFieldNames == null) || (readerFieldNames.Length == 0); if (tryAgain) { dataReader.Read(); readerFieldNames = dataReader.GetFieldNames(); numRows++; } if ((readerFieldNames == null) || (readerFieldNames.Length == 0)) { throw new Exception($"Could not get field names for file {drInfo.FilePath}"); } //unfortunately this duplicated code is here for a reason.. we want to read to initialize the reader, but we also want to capture the first row if we already read. Please refactor though foreach (var readerFieldName in readerFieldNames) { if (!uniqueValList.ContainsKey(readerFieldName)) { uniqueValList.AddOrUpdate( readerFieldName, //our key for the dictionary new ConcurrentHashSet <string>(), //if it doesn't exist create a new concurrent dictionary (field, uniqueVals) => //otherwise we want to return the new updated value uniqueVals); } ; if (masterListOfFieldNames.Contains(readerFieldName) == false) { masterListOfFieldNames.Add(readerFieldName); } } //we tried again and got the field names so we can go ahead and add the values that are currently on record if (tryAgain) { foreach (var col in readerFieldNames) { var val = dataReader[col]?.ToString(); var uvl = uniqueValList[col]; uvl.Add(val); } } //collect unique row values by column while (dataReader.Read() && ((numRows < numberOfRowsToExamine) || (numberOfRowsToExamine == null))) { foreach (var col in readerFieldNames) { var val = dataReader[col]?.ToString(); var uvl = uniqueValList[col]; uvl.Add(val); } numRows++; } }); var sqlTable = new SqlTableDefinition { TableName = outputTableName }; foreach (var col in masterListOfFieldNames.ToArray()) { sqlTable.ColumnDefinitions.Add(CreateTableSqlInternal.GetBestFitSqlColumnType(uniqueValList[col].Hashset, col)); } return(CreateTableSqlInternal.FromSqlTableDefinition(sqlTable)); }