public override async Task <List <string> > GetDataSampleAsync(Column column, int amount = 100) { var table = column.Table; List <string> values = new List <string>(Math.Min(amount, table.RowCount)); await Task.Run(() => { //using ( // var streamReader = // File.OpenText(Path.Combine(table.Database.ConnectionString, $"{table.Originalname}.csv"))) //{ using (var streamReader = new TextFieldParser(Path.Combine(table.Database.ConnectionString, $"{table.Originalname}.csv"))) { streamReader.TextFieldType = FieldType.Delimited; streamReader.SetDelimiters(","); streamReader.HasFieldsEnclosedInQuotes = true; while (!streamReader.EndOfData && amount > 0) { if (streamReader.LineNumber == 1 && TableHasHeaders[table.GetFullName()]) { streamReader.ReadLine(); continue; } //var line = streamReader.ReadLine(); //var columnValues = SplitLine(line); var columnValues = streamReader.ReadFields(); values.Add(columnValues[column.Ordinal]); amount--; } } }); return(values); }
// TODO: Does only work for simple CSV files. If commas or qoutes are used in the columns, then it crashes protected override CommonModel GetSchemaForConnection(string connectionstring) { List <Table> tables = new List <Table>(); // Get all csv files in folder foreach (var filepath in Directory.GetFiles(connectionstring, "*.csv")) { var table = new Table(Path.GetFileNameWithoutExtension(filepath), "CSV", 0); table.Database = new Database(Path.GetDirectoryName(connectionstring), connectionstring, "CSV"); // Find potential headers string[] names; bool headersExists; ContainsHeaders(filepath, out headersExists, out names); TableHasHeaders.Add(table.GetFullName(), headersExists); // Best datatypes found DataType[] currDataTypes = null; int currentRow = 0; //using (var streamReader = File.OpenText(filepath)) using (TextFieldParser streamReader = new TextFieldParser(filepath)) { streamReader.TextFieldType = FieldType.Delimited; streamReader.SetDelimiters(_configuration.RowDelimiter); streamReader.HasFieldsEnclosedInQuotes = true; streamReader.TrimWhiteSpace = false; // Iterate through rows to examine while (!streamReader.EndOfData && currentRow < _configuration.RowsToExamine) { // If headers exists, ignore the first row of data if (headersExists && currentRow == 0) { streamReader.ReadLine(); currentRow++; continue; } //var line = streamReader.ReadLine(); //var columnValues = SplitLine(line); var columnValues = streamReader.ReadFields(); // Set data types array var rowDataTypes = new DataType[columnValues.Length]; // Create data types for all values for (int i = 0; i < columnValues.Length; i++) { rowDataTypes[i] = CreateDatatype(columnValues[i]); } // Update currDataTypes with the widened version of this rows datatypes currDataTypes = Widen(rowDataTypes, currDataTypes); currentRow++; } // Iterate through rest to get row count while (!streamReader.EndOfData) { streamReader.ReadLine(); currentRow++; } // If no rows are available, but the file has headers. if (currDataTypes == null && headersExists) { currDataTypes = new DataType[names.Length]; // Generate varwchar columns for each column for (var i = 0; i < currDataTypes.Length; i++) { currDataTypes[i] = new DataType(OleDbType.VarWChar); } } // If any datatypes are found, then add columns for them if (currDataTypes != null) { for (var i = 0; i < currDataTypes.Length; i++) { Column column = new Column(i, string.Empty); if (headersExists) { column.AddNameCandidate(names[i], 1f); } else { column.AddNameCandidate($"Column_{i + 1}", 0.01f); } column.AddDatatypeCandidate(currDataTypes[i], 0.5f); table.AddColumn(column); } } } // Ensure rowcount is correct based on whether the file has headers or not table.RowCount = headersExists ? currentRow - 1 : currentRow; tables.Add(table); } return(new CommonModel(tables)); }