public ParserCSV(string tableName, StreamReader streamReader, Dictionary <string, Type> types = null, Configuration configuration = null, long startBytePosition = 0, long endBytePosition = 0) { if (configuration != null) { this.configuration = configuration; } else { this.configuration = new Configuration(); } this.streamReader = streamReader; this.startBytePosition = startBytePosition; if (endBytePosition != 0) { this.endBytePosition = endBytePosition; } else { this.endBytePosition = this.streamReader.BaseStream.Length; } this.streamReader.BaseStream.Seek(startBytePosition, SeekOrigin.Begin); if (types != null) { this.types = types; } else { if (streamReader.BaseStream is FileStream) { string file = (streamReader.BaseStream as FileStream).Name; this.types = GuessTypes(file, configuration.HasHeader, configuration.ColumnSeparator, configuration.Encoding); } } this.header = new string[this.types.Keys.Count]; this.types.Keys.CopyTo(this.header, 0); this.tableData = new NetworkClient.ColumnarDataTable(tableName); foreach (var head in this.header) { tableData.AddColumn(head, this.types[head]); } dataParser = new CsvReader(this.streamReader).Parser; dataParser.Configuration.Delimiter = this.configuration.ColumnSeparator.ToString(); dataParser.Configuration.BadDataFound = null; dataParser.Configuration.TrimOptions = CsvHelper.Configuration.TrimOptions.Trim; // skip header at the beginning of the file or // also at the beginning of thread chunk if (startBytePosition == 0 && this.configuration.HasHeader) { dataParser.Read(); } else if (startBytePosition > 0) { dataParser.Read(); } }
/// <summary> /// Parses stream in batches and each batch is imported by the client /// </summary> /// <param name="threadId">Id of thread starting from 0</param> /// <param name="file">Name of the file with full path</param> /// <param name="configuration">Configuration for parser with specified parameters (column separator, encoding, etc.)</param> /// <param name="types">Dictionary describing imported table with tuples (column name, column type)</param> /// <param name="startBytePosition">Start reading position in file</param> /// <param name="endBytePosition">End reading position in file</param> /// <returns>string about not finding CSV file</returns> private void ParseAndImportBatch(int threadId, string file, ParserCSV.Configuration configuration, Dictionary <string, Type> types, long startBytePosition = 0, long endBytePosition = 0) { var stream = File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read); var streamReader = new StreamReader(stream, configuration.Encoding); ParserCSV parserCSV = new ParserCSV(streamReader: streamReader, tableName: tableName, configuration: configuration, types: types, startBytePosition: startBytePosition, endBytePosition: endBytePosition); long lines = 0; long errors = 0; while (true) { long batchImportedLines; long batchErrorLines; var outData = parserCSV.GetNextParsedDataBatch(out batchImportedLines, out batchErrorLines); if (outData == null) { break; } lines += batchImportedLines; errors += batchErrorLines; var colData = new NetworkClient.ColumnarDataTable(outData.GetColumnNames(), outData.GetColumnData(), outData.GetColumnTypes(), outData.GetColumnNames()); colData.TableName = tableName; mutex.WaitOne(); try { client.BulkImport(colData); } catch (Exception) { mutex.ReleaseMutex(); throw; } mutex.ReleaseMutex(); linesImported[threadId] = lines; bytesImported[threadId] = parserCSV.GetStreamPosition(); linesError[threadId] = errors; long totalLinesImported = 0; for (int i = 0; i < linesImported.Length; i++) { totalLinesImported += linesImported[i]; } long totalBytesImported = 0; for (int i = 0; i < bytesImported.Length; i++) { totalBytesImported += bytesImported[i]; } long totalLinesError = 0; for (int i = 0; i < linesError.Length; i++) { totalLinesError += linesError[i]; } Console.Write("\rImported " + totalLinesImported + " records so far (" + Math.Min(Math.Round((float)totalBytesImported / streamLength * 100), 100) + "%)..."); } if (stream != null) { stream.Dispose(); } }