public ColumnSchema Clone() { var clone = new ColumnSchema { ColumnName = this.ColumnName, DataType = this.DataType, DataTypeName = this.DataTypeName, ColumnSize = this.ColumnSize, NumericPrecision = this.NumericPrecision, NumericScale = this.NumericScale, AllowNull = this.AllowNull }; return(clone); }
protected override void OnBeginReading() { // Use Parquet.NET to do the heavy reading. var options = new Parquet.ParquetOptions(); _pqReader = new Parquet.ParquetReader(_reader.BaseStream); _pqDataFields = _pqReader.Schema.GetDataFields(); // Extract the column names from the parquet file in a separate array for fast lookups. for (int i = 0; i < _pqDataFields.Length; i++) { var columnSchema = new ColumnSchema() { ColumnName = _pqDataFields[i].Name, DataTypeName = _pqDataFields[i].DataType.ToString(), ColumnSize = -1, NumericPrecision = -1, NumericScale = -1, AllowNull = _pqDataFields[i].HasNulls }; _columnSchema.Add(columnSchema); } }
protected override void OnBeginReading() { // Even if no header is set, we still need to know how many columns there are. _firstLine = _reader.ReadLine(); var matches = Regex.Matches(_firstLine, _regexParseExpression, RegexOptions.IgnoreCase | RegexOptions.Multiline); // Foreach of the extract columns from the first row for (var i = 0; i < matches.Count; i++) { // Even though we only support the string data type, we must generate a SchemaTable // to adhere to the IDataReader standard (and required by importers). var columnSchema = new ColumnSchema() { ColumnName = _header ? matches[i].Groups[2].Value : i.ToString(), // if no header, use the column ordinal ColumnSize = -1, DataType = typeof(System.String), DataTypeName = "STRING", AllowNull = true }; _columnSchema.Add(columnSchema); } }