Пример #1
0
        public override async Task <List <string> > GetDataSampleAsync(Column column, int amount = 100)
        {
            var           table  = column.Table;
            List <string> values = new List <string>(Math.Min(amount, table.RowCount));

            await Task.Run(() =>
            {
                //using (
                //    var streamReader =
                //        File.OpenText(Path.Combine(table.Database.ConnectionString, $"{table.Originalname}.csv")))
                //{
                using (var streamReader = new TextFieldParser(Path.Combine(table.Database.ConnectionString, $"{table.Originalname}.csv")))
                {
                    streamReader.TextFieldType = FieldType.Delimited;
                    streamReader.SetDelimiters(",");
                    streamReader.HasFieldsEnclosedInQuotes = true;
                    while (!streamReader.EndOfData && amount > 0)
                    {
                        if (streamReader.LineNumber == 1 && TableHasHeaders[table.GetFullName()])
                        {
                            streamReader.ReadLine();
                            continue;
                        }
                        //var line = streamReader.ReadLine();
                        //var columnValues = SplitLine(line);
                        var columnValues = streamReader.ReadFields();
                        values.Add(columnValues[column.Ordinal]);
                        amount--;
                    }
                }
            });

            return(values);
        }
Пример #2
0
        // TODO: Does only work for simple CSV files. If commas or qoutes are used in the columns, then it crashes
        protected override CommonModel GetSchemaForConnection(string connectionstring)
        {
            List <Table> tables = new List <Table>();

            // Get all csv files in folder
            foreach (var filepath in Directory.GetFiles(connectionstring, "*.csv"))
            {
                var table = new Table(Path.GetFileNameWithoutExtension(filepath), "CSV", 0);
                table.Database = new Database(Path.GetDirectoryName(connectionstring), connectionstring, "CSV");

                // Find potential headers
                string[] names;
                bool     headersExists;
                ContainsHeaders(filepath, out headersExists, out names);
                TableHasHeaders.Add(table.GetFullName(), headersExists);

                // Best datatypes found
                DataType[] currDataTypes = null;

                int currentRow = 0;
                //using (var streamReader = File.OpenText(filepath))
                using (TextFieldParser streamReader = new TextFieldParser(filepath))
                {
                    streamReader.TextFieldType = FieldType.Delimited;
                    streamReader.SetDelimiters(_configuration.RowDelimiter);
                    streamReader.HasFieldsEnclosedInQuotes = true;
                    streamReader.TrimWhiteSpace            = false;
                    // Iterate through rows to examine
                    while (!streamReader.EndOfData && currentRow < _configuration.RowsToExamine)
                    {
                        // If headers exists, ignore the first row of data
                        if (headersExists && currentRow == 0)
                        {
                            streamReader.ReadLine();
                            currentRow++;
                            continue;
                        }

                        //var line = streamReader.ReadLine();
                        //var columnValues = SplitLine(line);
                        var columnValues = streamReader.ReadFields();

                        // Set data types array
                        var rowDataTypes = new DataType[columnValues.Length];

                        // Create data types for all values
                        for (int i = 0; i < columnValues.Length; i++)
                        {
                            rowDataTypes[i] = CreateDatatype(columnValues[i]);
                        }

                        // Update currDataTypes with the widened version of this rows datatypes
                        currDataTypes = Widen(rowDataTypes, currDataTypes);

                        currentRow++;
                    }

                    // Iterate through rest to get row count
                    while (!streamReader.EndOfData)
                    {
                        streamReader.ReadLine();
                        currentRow++;
                    }

                    // If no rows are available, but the file has headers.
                    if (currDataTypes == null && headersExists)
                    {
                        currDataTypes = new DataType[names.Length];

                        // Generate varwchar columns for each column
                        for (var i = 0; i < currDataTypes.Length; i++)
                        {
                            currDataTypes[i] = new DataType(OleDbType.VarWChar);
                        }
                    }

                    // If any datatypes are found, then add columns for them
                    if (currDataTypes != null)
                    {
                        for (var i = 0; i < currDataTypes.Length; i++)
                        {
                            Column column = new Column(i, string.Empty);

                            if (headersExists)
                            {
                                column.AddNameCandidate(names[i], 1f);
                            }
                            else
                            {
                                column.AddNameCandidate($"Column_{i + 1}", 0.01f);
                            }

                            column.AddDatatypeCandidate(currDataTypes[i], 0.5f);
                            table.AddColumn(column);
                        }
                    }
                }
                // Ensure rowcount is correct based on whether the file has headers or not
                table.RowCount = headersExists ? currentRow - 1 : currentRow;
                tables.Add(table);
            }

            return(new CommonModel(tables));
        }