示例#1
0
        public ParserCSV(string tableName, StreamReader streamReader, Dictionary <string, Type> types = null, Configuration configuration = null, long startBytePosition = 0, long endBytePosition = 0)
        {
            if (configuration != null)
            {
                this.configuration = configuration;
            }
            else
            {
                this.configuration = new Configuration();
            }

            this.streamReader      = streamReader;
            this.startBytePosition = startBytePosition;
            if (endBytePosition != 0)
            {
                this.endBytePosition = endBytePosition;
            }
            else
            {
                this.endBytePosition = this.streamReader.BaseStream.Length;
            }

            this.streamReader.BaseStream.Seek(startBytePosition, SeekOrigin.Begin);

            if (types != null)
            {
                this.types = types;
            }
            else
            {
                if (streamReader.BaseStream is FileStream)
                {
                    string file = (streamReader.BaseStream as FileStream).Name;
                    this.types = GuessTypes(file, configuration.HasHeader, configuration.ColumnSeparator, configuration.Encoding);
                }
            }
            this.header = new string[this.types.Keys.Count];
            this.types.Keys.CopyTo(this.header, 0);

            this.tableData = new NetworkClient.ColumnarDataTable(tableName);
            foreach (var head in this.header)
            {
                tableData.AddColumn(head, this.types[head]);
            }

            dataParser = new CsvReader(this.streamReader).Parser;
            dataParser.Configuration.Delimiter    = this.configuration.ColumnSeparator.ToString();
            dataParser.Configuration.BadDataFound = null;
            dataParser.Configuration.TrimOptions  = CsvHelper.Configuration.TrimOptions.Trim;
            // skip header at the beginning of the file or
            // also at the beginning of thread chunk
            if (startBytePosition == 0 && this.configuration.HasHeader)
            {
                dataParser.Read();
            }
            else if (startBytePosition > 0)
            {
                dataParser.Read();
            }
        }
示例#2
0
        /// <summary>
        /// Parses stream in batches and each batch is imported by the client
        /// </summary>
        /// <param name="threadId">Id of thread starting from 0</param>
        /// <param name="file">Name of the file with full path</param>
        /// <param name="configuration">Configuration for parser with specified parameters (column separator, encoding, etc.)</param>
        /// <param name="types">Dictionary describing imported table with tuples (column name, column type)</param>
        /// <param name="startBytePosition">Start reading position in file</param>
        /// <param name="endBytePosition">End reading position in file</param>
        /// <returns>string about not finding CSV file</returns>
        private void ParseAndImportBatch(int threadId, string file, ParserCSV.Configuration configuration, Dictionary <string, Type> types, long startBytePosition = 0, long endBytePosition = 0)
        {
            var       stream       = File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read);
            var       streamReader = new StreamReader(stream, configuration.Encoding);
            ParserCSV parserCSV    = new ParserCSV(streamReader: streamReader, tableName: tableName, configuration: configuration, types: types, startBytePosition: startBytePosition, endBytePosition: endBytePosition);

            long lines  = 0;
            long errors = 0;

            while (true)
            {
                long batchImportedLines;
                long batchErrorLines;

                var outData = parserCSV.GetNextParsedDataBatch(out batchImportedLines, out batchErrorLines);

                if (outData == null)
                {
                    break;
                }
                lines  += batchImportedLines;
                errors += batchErrorLines;

                var colData = new NetworkClient.ColumnarDataTable(outData.GetColumnNames(), outData.GetColumnData(), outData.GetColumnTypes(), outData.GetColumnNames());
                colData.TableName = tableName;

                mutex.WaitOne();
                try
                {
                    client.BulkImport(colData);
                }
                catch (Exception)
                {
                    mutex.ReleaseMutex();
                    throw;
                }
                mutex.ReleaseMutex();

                linesImported[threadId] = lines;
                bytesImported[threadId] = parserCSV.GetStreamPosition();
                linesError[threadId]    = errors;

                long totalLinesImported = 0;
                for (int i = 0; i < linesImported.Length; i++)
                {
                    totalLinesImported += linesImported[i];
                }

                long totalBytesImported = 0;
                for (int i = 0; i < bytesImported.Length; i++)
                {
                    totalBytesImported += bytesImported[i];
                }

                long totalLinesError = 0;
                for (int i = 0; i < linesError.Length; i++)
                {
                    totalLinesError += linesError[i];
                }
                Console.Write("\rImported " + totalLinesImported + " records so far (" + Math.Min(Math.Round((float)totalBytesImported / streamLength * 100), 100) + "%)...");
            }

            if (stream != null)
            {
                stream.Dispose();
            }
        }