/// <summary> /// Method to import CSV file /// </summary> /// <param name="path">Path to csv file that should be imported</param> /// <param name="databaseName">Database that is used</param> /// <param name="blockSize">Block size of table, if not specified default value of database server configuration will be used</param> /// <param name="hasHeader">Specifies whether input csv has header, if not specified default value is true</param> /// <param name="columnSeparator">Char representing column separator, if not specified default value will be guessed</param> /// <param name="batchSize">Number of lines processed in one batch, if not specified default value is 65536</param> /// <param name="threadsCount">Number of threads for processing csv, if not specified number of cores of the client CPU will be used</param> /// <param name="encoding">Encoding of csv, if not specified it will be guessed</param> public void Import(string path, string databaseName, string tableName = "", int blockSize = 0, bool hasHeader = true, char columnSeparator = '\0', int batchSize = 65536, int threadsCount = 0, Encoding encoding = null) { this.databaseName = databaseName; try { // Table name from path or from argument if present if (tableName == "") { tableName = Path.GetFileNameWithoutExtension(path); } this.tableName = tableName; if (encoding == null) { encoding = ParserCSV.GuessEncoding(path); } if (columnSeparator == '\0') { columnSeparator = ParserCSV.GuessSeparator(path, encoding); } var types = ParserCSV.GuessTypes(path, hasHeader, columnSeparator, encoding); streamLength = ParserCSV.GetStreamLength(path); client.Connect(); client.UseDatabase(databaseName); CreateTable(databaseName, tableName, types, blockSize); ParserCSV.Configuration configuration = new ParserCSV.Configuration(batchSize: batchSize, encoding: encoding, columnSeparator: columnSeparator); if (threadsCount <= 0) { // Use more threads if file is bigger than 10MB if (streamLength > 10000000) { threadsCount = Environment.ProcessorCount; } else { threadsCount = 1; } } Console.WriteLine("Importing started with " + threadsCount + " threads."); var startTime = DateTime.Now; long streamThreadLength = streamLength / threadsCount; long lines = 0; long errors = 0; linesImported = new long[threadsCount]; bytesImported = new long[threadsCount]; linesError = new long[threadsCount]; Thread[] threads = new Thread[threadsCount]; Exception[] threadExceptions = new Exception[threadsCount]; // Each thread has its own instance of the Parser (because of different position of read) and ColumnarDBClient (because of concurrent bulk import) for (int i = 0; i < threadsCount; i++) { long start = i * streamThreadLength; long end = i * streamThreadLength + streamThreadLength + 1; int index = i; threadExceptions[index] = null; threads[index] = new Thread(() => { try { this.ParseAndImportBatch(index, path, configuration, types, start, end); } catch (Exception ex) { threadExceptions[index] = ex; } }); threads[index].Start(); //this.ParseAndImportBatch(index, path, configuration, types, start, end); } for (int i = 0; i < threadsCount; i++) { threads[i].Join(); } for (int i = 0; i < threadsCount; i++) { if (threadExceptions[i] != null) { throw threadExceptions[i]; } } for (int i = 0; i < threadsCount; i++) { lines += linesImported[i]; errors += linesError[i]; } client.Dispose(); var endTime = DateTime.Now; Console.WriteLine(); Console.WriteLine("Importing done (imported " + lines + " records, " + errors + " failed, " + (endTime - startTime).TotalSeconds.ToString() + "sec.)."); } catch (FileNotFoundException) { Console.WriteLine(FileNotFound(path)); } catch (IOException e) { Console.WriteLine(e.Message); } catch (QueryException e) { Console.WriteLine("Query Exception: " + e.Message); } catch (ParserCSV.ParserException e) { Console.WriteLine("Parser Exception: " + e.Message); } catch (Exception e) { Console.WriteLine(UnknownException() + e.Message); } }
/// <summary> /// Parses stream in batches and each batch is imported by the client /// </summary> /// <param name="threadId">Id of thread starting from 0</param> /// <param name="file">Name of the file with full path</param> /// <param name="configuration">Configuration for parser with specified parameters (column separator, encoding, etc.)</param> /// <param name="types">Dictionary describing imported table with tuples (column name, column type)</param> /// <param name="startBytePosition">Start reading position in file</param> /// <param name="endBytePosition">End reading position in file</param> /// <returns>string about not finding CSV file</returns> private void ParseAndImportBatch(int threadId, string file, ParserCSV.Configuration configuration, Dictionary <string, Type> types, long startBytePosition = 0, long endBytePosition = 0) { var stream = File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read); var streamReader = new StreamReader(stream, configuration.Encoding); ParserCSV parserCSV = new ParserCSV(streamReader: streamReader, tableName: tableName, configuration: configuration, types: types, startBytePosition: startBytePosition, endBytePosition: endBytePosition); long lines = 0; long errors = 0; while (true) { long batchImportedLines; long batchErrorLines; var outData = parserCSV.GetNextParsedDataBatch(out batchImportedLines, out batchErrorLines); if (outData == null) { break; } lines += batchImportedLines; errors += batchErrorLines; var colData = new NetworkClient.ColumnarDataTable(outData.GetColumnNames(), outData.GetColumnData(), outData.GetColumnTypes(), outData.GetColumnNames()); colData.TableName = tableName; mutex.WaitOne(); try { client.BulkImport(colData); } catch (Exception) { mutex.ReleaseMutex(); throw; } mutex.ReleaseMutex(); linesImported[threadId] = lines; bytesImported[threadId] = parserCSV.GetStreamPosition(); linesError[threadId] = errors; long totalLinesImported = 0; for (int i = 0; i < linesImported.Length; i++) { totalLinesImported += linesImported[i]; } long totalBytesImported = 0; for (int i = 0; i < bytesImported.Length; i++) { totalBytesImported += bytesImported[i]; } long totalLinesError = 0; for (int i = 0; i < linesError.Length; i++) { totalLinesError += linesError[i]; } Console.Write("\rImported " + totalLinesImported + " records so far (" + Math.Min(Math.Round((float)totalBytesImported / streamLength * 100), 100) + "%)..."); } if (stream != null) { stream.Dispose(); } }