示例#1
0
        /// <summary>
        ///     Gets schema from a data table, but parses the data to determine what datatype is best fit.
        /// </summary>
        /// <param name="outputTableName">The output table name e.g. "CREATE TABLE [NAME] (..."</param>
        /// <param name="data">Data to analyze</param>
        /// <returns></returns>
        public static string FromDataTable_Smart(string outputTableName, DataTable data)
        {
            var sqlTable = new SqlTableDefinition
            {
                TableName = outputTableName
            };


            //loop through each column
            for (var colIndex = 0; colIndex < data.Columns.Count; colIndex++)
            {
                var rowVals = new HashSet <string>(StringComparer.OrdinalIgnoreCase);

                //collect row values into hashset
                for (var rowIndex = 0; rowIndex < data.Rows.Count; rowIndex++)
                {
                    rowVals.Add(data.Rows[rowIndex][colIndex].ToString());
                }

                sqlTable.ColumnDefinitions.Add(CreateTableSqlInternal.GetBestFitSqlColumnType(rowVals, data.Columns[colIndex].ColumnName));
            }

            return(CreateTableSqlInternal.FromSqlTableDefinition(sqlTable));
        }
示例#2
0
        public static string FromDataReader_Smart(string outputTableName, IEnumerable <Func <DataReaderInfo> > dataReaders,
                                                  int?numberOfRowsToExamine = null)
        {
            //maps column names to hashset of unique values present for those columns
            var uniqueValList = new ConcurrentDictionary <string, ConcurrentHashSet <string> >();

            //we want the field names because these are our output table sql's column names

            //this is not thread safe
            var masterListOfFieldNames = new ConcurrentHashSet <string>();

            //fit the data types from the data readers' data
            var result = Parallel.ForEach(dataReaders, new ParallelOptions {
                MaxDegreeOfParallelism = 12
            },
                                          dataReaderInfoFac =>
            {
                //get column names and setup hash dictionaries
                var numRows = 0;

                var drInfo = dataReaderInfoFac();

                using var dataReader = drInfo.DataReader;

                var readerFieldNames = dataReader.GetFieldNames();

                //if there was a problem we may have to read to read first to initialize in order to get fields
                var tryAgain = (readerFieldNames == null) || (readerFieldNames.Length == 0);
                if (tryAgain)
                {
                    dataReader.Read();
                    readerFieldNames = dataReader.GetFieldNames();
                    numRows++;
                }

                if ((readerFieldNames == null) || (readerFieldNames.Length == 0))
                {
                    throw new Exception($"Could not get field names for file {drInfo.FilePath}");
                }

                //unfortunately this duplicated code is here for a reason.. we want to read to initialize the reader, but we also want to capture the first row if we already read. Please refactor though
                foreach (var readerFieldName in readerFieldNames)
                {
                    if (!uniqueValList.ContainsKey(readerFieldName))
                    {
                        uniqueValList.AddOrUpdate(
                            readerFieldName,     //our key for the dictionary
                            new ConcurrentHashSet <string>(),
                            //if it doesn't exist create a new concurrent dictionary
                            (field, uniqueVals) =>     //otherwise we want to return the new updated value
                            uniqueVals);
                    }
                    ;

                    if (masterListOfFieldNames.Contains(readerFieldName) == false)
                    {
                        masterListOfFieldNames.Add(readerFieldName);
                    }
                }

                //we tried again and got the field names so we can go ahead and add the values that are currently on record
                if (tryAgain)
                {
                    foreach (var col in readerFieldNames)
                    {
                        var val = dataReader[col]?.ToString();
                        var uvl = uniqueValList[col];
                        uvl.Add(val);
                    }
                }

                //collect unique row values by column
                while (dataReader.Read() && ((numRows < numberOfRowsToExamine) || (numberOfRowsToExamine == null)))
                {
                    foreach (var col in readerFieldNames)
                    {
                        var val = dataReader[col]?.ToString();
                        var uvl = uniqueValList[col];
                        uvl.Add(val);
                    }
                    numRows++;
                }
            });

            var sqlTable = new SqlTableDefinition
            {
                TableName = outputTableName
            };

            foreach (var col in masterListOfFieldNames.ToArray())
            {
                sqlTable.ColumnDefinitions.Add(CreateTableSqlInternal.GetBestFitSqlColumnType(uniqueValList[col].Hashset, col));
            }

            return(CreateTableSqlInternal.FromSqlTableDefinition(sqlTable));
        }