Ejemplo n.º 1
0
        /// <summary>
        /// Method to import CSV file
        /// </summary>
        /// <param name="path">Path to csv file that should be imported</param>
        /// <param name="databaseName">Database that is used</param>
        /// <param name="blockSize">Block size of table, if not specified default value of database server configuration will be used</param>
        /// <param name="hasHeader">Specifies whether input csv has header, if not specified default value is true</param>
        /// <param name="columnSeparator">Char representing column separator, if not specified default value will be guessed</param>
        /// <param name="batchSize">Number of lines processed in one batch, if not specified default value is 65536</param>
        /// <param name="threadsCount">Number of threads for processing csv, if not specified number of cores of the client CPU will be used</param>
        /// <param name="encoding">Encoding of csv, if not specified it will be guessed</param>
        public void Import(string path, string databaseName,
                           string tableName = "", int blockSize = 0, bool hasHeader = true, char columnSeparator = '\0', int batchSize = 65536, int threadsCount = 0, Encoding encoding = null)
        {
            this.databaseName = databaseName;

            try
            {
                // Table name from path or from argument if present
                if (tableName == "")
                {
                    tableName = Path.GetFileNameWithoutExtension(path);
                }

                this.tableName = tableName;

                if (encoding == null)
                {
                    encoding = ParserCSV.GuessEncoding(path);
                }
                if (columnSeparator == '\0')
                {
                    columnSeparator = ParserCSV.GuessSeparator(path, encoding);
                }
                var types = ParserCSV.GuessTypes(path, hasHeader, columnSeparator, encoding);
                streamLength = ParserCSV.GetStreamLength(path);

                client.Connect();
                client.UseDatabase(databaseName);
                CreateTable(databaseName, tableName, types, blockSize);

                ParserCSV.Configuration configuration = new ParserCSV.Configuration(batchSize: batchSize, encoding: encoding, columnSeparator: columnSeparator);

                if (threadsCount <= 0)
                {
                    // Use more threads if file is bigger than 10MB
                    if (streamLength > 10000000)
                    {
                        threadsCount = Environment.ProcessorCount;
                    }
                    else
                    {
                        threadsCount = 1;
                    }
                }

                Console.WriteLine("Importing started with " + threadsCount + " threads.");
                var startTime = DateTime.Now;

                long streamThreadLength = streamLength / threadsCount;
                long lines  = 0;
                long errors = 0;
                linesImported = new long[threadsCount];
                bytesImported = new long[threadsCount];
                linesError    = new long[threadsCount];
                Thread[]    threads          = new Thread[threadsCount];
                Exception[] threadExceptions = new Exception[threadsCount];

                // Each thread has its own instance of the Parser (because of different position of read) and ColumnarDBClient (because of concurrent bulk import)
                for (int i = 0; i < threadsCount; i++)
                {
                    long start = i * streamThreadLength;
                    long end   = i * streamThreadLength + streamThreadLength + 1;
                    int  index = i;
                    threadExceptions[index] = null;
                    threads[index]          = new Thread(() =>
                    {
                        try
                        {
                            this.ParseAndImportBatch(index, path, configuration, types, start, end);
                        }
                        catch (Exception ex)
                        {
                            threadExceptions[index] = ex;
                        }
                    });
                    threads[index].Start();
                    //this.ParseAndImportBatch(index, path, configuration, types, start, end);
                }

                for (int i = 0; i < threadsCount; i++)
                {
                    threads[i].Join();
                }

                for (int i = 0; i < threadsCount; i++)
                {
                    if (threadExceptions[i] != null)
                    {
                        throw threadExceptions[i];
                    }
                }

                for (int i = 0; i < threadsCount; i++)
                {
                    lines  += linesImported[i];
                    errors += linesError[i];
                }

                client.Dispose();

                var endTime = DateTime.Now;
                Console.WriteLine();
                Console.WriteLine("Importing done (imported " + lines + " records, " + errors + " failed, " + (endTime - startTime).TotalSeconds.ToString() + "sec.).");
            }
            catch (FileNotFoundException)
            {
                Console.WriteLine(FileNotFound(path));
            }
            catch (IOException e)
            {
                Console.WriteLine(e.Message);
            }
            catch (QueryException e)
            {
                Console.WriteLine("Query Exception: " + e.Message);
            }
            catch (ParserCSV.ParserException e)
            {
                Console.WriteLine("Parser Exception: " + e.Message);
            }
            catch (Exception e)
            {
                Console.WriteLine(UnknownException() + e.Message);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Parses stream in batches and each batch is imported by the client
        /// </summary>
        /// <param name="threadId">Id of thread starting from 0</param>
        /// <param name="file">Name of the file with full path</param>
        /// <param name="configuration">Configuration for parser with specified parameters (column separator, encoding, etc.)</param>
        /// <param name="types">Dictionary describing imported table with tuples (column name, column type)</param>
        /// <param name="startBytePosition">Start reading position in file</param>
        /// <param name="endBytePosition">End reading position in file</param>
        /// <returns>string about not finding CSV file</returns>
        private void ParseAndImportBatch(int threadId, string file, ParserCSV.Configuration configuration, Dictionary <string, Type> types, long startBytePosition = 0, long endBytePosition = 0)
        {
            var       stream       = File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read);
            var       streamReader = new StreamReader(stream, configuration.Encoding);
            ParserCSV parserCSV    = new ParserCSV(streamReader: streamReader, tableName: tableName, configuration: configuration, types: types, startBytePosition: startBytePosition, endBytePosition: endBytePosition);

            long lines  = 0;
            long errors = 0;

            while (true)
            {
                long batchImportedLines;
                long batchErrorLines;

                var outData = parserCSV.GetNextParsedDataBatch(out batchImportedLines, out batchErrorLines);

                if (outData == null)
                {
                    break;
                }
                lines  += batchImportedLines;
                errors += batchErrorLines;

                var colData = new NetworkClient.ColumnarDataTable(outData.GetColumnNames(), outData.GetColumnData(), outData.GetColumnTypes(), outData.GetColumnNames());
                colData.TableName = tableName;

                mutex.WaitOne();
                try
                {
                    client.BulkImport(colData);
                }
                catch (Exception)
                {
                    mutex.ReleaseMutex();
                    throw;
                }
                mutex.ReleaseMutex();

                linesImported[threadId] = lines;
                bytesImported[threadId] = parserCSV.GetStreamPosition();
                linesError[threadId]    = errors;

                long totalLinesImported = 0;
                for (int i = 0; i < linesImported.Length; i++)
                {
                    totalLinesImported += linesImported[i];
                }

                long totalBytesImported = 0;
                for (int i = 0; i < bytesImported.Length; i++)
                {
                    totalBytesImported += bytesImported[i];
                }

                long totalLinesError = 0;
                for (int i = 0; i < linesError.Length; i++)
                {
                    totalLinesError += linesError[i];
                }
                Console.Write("\rImported " + totalLinesImported + " records so far (" + Math.Min(Math.Round((float)totalBytesImported / streamLength * 100), 100) + "%)...");
            }

            if (stream != null)
            {
                stream.Dispose();
            }
        }