public SeparatedValuesTable(string fileName, string separator, bool hasHeader, int skipLines) { var file = new FileInfo(fileName); using (var stream = new StreamReader(file.OpenRead())) { var line = string.Empty; var currentLine = 0; while (!stream.EndOfStream && ((line = stream.ReadLine()) == string.Empty || currentLine < skipLines)) { currentLine += 1; } var columns = line.Split(new[] { separator }, StringSplitOptions.None); if (hasHeader) { Columns = columns .Select((header, i) => (ISchemaColumn) new SchemaColumn(SeparatedValuesHelper.MakeHeaderNameValidColumnName(header), i, typeof(string))) .ToArray(); } else { Columns = columns .Select((f, i) => (ISchemaColumn) new SchemaColumn(string.Format(SeparatedValuesHelper.AutoColumnName, i + 1), i, typeof(string))) .ToArray(); } } }
private void ProcessFile(SeparatedValueFile csvFile, BlockingCollection <IReadOnlyList <DataSources.IObjectResolver> > chunkedSource) { var file = new FileInfo(csvFile.FilePath); if (!file.Exists) { chunkedSource.Add(new List <EntityResolver <object[]> >()); return; } var nameToIndexMap = new Dictionary <string, int>(); var indexToMethodAccess = new Dictionary <int, Func <object[], object> >(); var indexToNameMap = new Dictionary <int, string>(); var endWorkToken = _context.EndWorkToken; using (var stream = CreateStreamFromFile(file)) { using (var reader = new StreamReader(stream, Encoding.UTF8)) { SkipLines(reader, csvFile); using (var csvReader = new CsvReader(reader)) { csvReader.Configuration.Delimiter = csvFile.Separator; csvReader.Read(); var header = csvReader.Context.Record; for (var i = 0; i < header.Length; ++i) { var headerName = csvFile.HasHeader ? SeparatedValuesHelper.MakeHeaderNameValidColumnName(header[i]) : string.Format(SeparatedValuesHelper.AutoColumnName, i + 1); nameToIndexMap.Add(headerName, i); indexToNameMap.Add(i, headerName); var i1 = i; indexToMethodAccess.Add(i, row => row[i1]); } } } } using (var stream = CreateStreamFromFile(file)) { using (var reader = new StreamReader(stream)) { SkipLines(reader, csvFile); using (var csvReader = new CsvReader(reader)) { csvReader.Configuration.BadDataFound = context => { }; csvReader.Configuration.Delimiter = csvFile.Separator; int i = 1, j = 11; var list = new List <EntityResolver <object[]> >(100); var rowsToRead = 1000; const int rowsToReadBase = 100; if (csvFile.HasHeader) { csvReader.Read(); //skip header. } while (csvReader.Read()) { var rawRow = csvReader.Context.Record; list.Add(new EntityResolver <object[]>(ParseRecords(rawRow, indexToNameMap), nameToIndexMap, indexToMethodAccess)); if (i++ < rowsToRead) { continue; } i = 1; if (j > 1) { j -= 1; } rowsToRead = rowsToReadBase * j; chunkedSource.Add(list, endWorkToken); list = new List <EntityResolver <object[]> >(rowsToRead); } chunkedSource.Add(list, endWorkToken); } } } }