예제 #1
0
파일: Helpers.CSV.cs 프로젝트: kblc/Helpers
        /// <summary>
        /// Load CSV table from lines
        /// </summary>
        /// <param name="lines">CSV file lines</param>
        /// <param name="tableName">Result table name</param>
        /// <param name="filePath">Result file path</param>
        /// <param name="hasColumns">Is first row is column row</param>
        /// <param name="delimiter">Separator between data</param>
        /// <param name="verboseLogAction">Action to verbose load action</param>
        /// <param name="columnRenamer">Action to rename columns</param>
        /// <param name="tableValidator">Validate table before load</param>
        /// <param name="rowFilter">Validate each row when load it</param>
        /// <returns>CSV file load info</returns>
        public static CSVFile Load(
            IEnumerable<string> lines,
            string tableName = "{virtual}",
            string filePath = "{virtual}",
            bool hasColumns = true,
            string delimiter = ";",
            Action<string> verboseLogAction = null,
            Func<string, string> columnRenamer = null,
            Action<DataTable> tableValidator = null,
            Expression<Func<DataRow, bool>> rowFilter = null)
        {
            if (lines == null)
                throw new ArgumentNullException("lines");

            if (delimiter == null)
                throw new ArgumentNullException("delimiter");

            if (string.IsNullOrWhiteSpace(delimiter))
                throw new ArgumentException("delimiter");

            verboseLogAction = verboseLogAction ?? new Action<string>((s) => { });
            columnRenamer = columnRenamer ?? new Func<string, string>((s) => s);
            tableValidator = tableValidator ?? new Action<DataTable>((table) => { });

            Expression<Func<DataRow, bool>> defFilter = d => false;
            rowFilter = rowFilter ?? defFilter;

            verboseLogAction(string.Format("start load. Total lines in lines array: '{0}'", lines.Count()));

            var res = new CSVFile()
            {
                Table = new DataTable(tableName ?? string.Empty),
                FilePath = filePath ?? string.Empty,
                ProcessedRowCount = 0,
                TotalRowCount = lines.Count()
            };

            var linesArr = lines.Where(l => !string.IsNullOrWhiteSpace(l)).ToArray();

            try
            {
                var rows = Enumerable.Range(0, linesArr.Length)
                    .AsParallel()
                    .Select(i => new { Index = i, Line = linesArr[i] })
                    .Select(i => new { i.Index, Fields = GetCsvFields(i.Line, delimiter).ToArray() })
                    .OrderBy(i => i.Index)
                    .ToArray();

                var firstRow = rows.FirstOrDefault();
                if (firstRow != null)
                {
                    #region Columns
                    if (hasColumns)
                    {
                        verboseLogAction(string.Format("read columns"));
                        res.Table.Columns.AddRange(
                            Enumerable.Range(0, firstRow.Fields.Length)
                                .Select(i => new { ColumnName = firstRow.Fields[i], Index = i })
                                .Select(c => new { ColumnName = columnRenamer(c.ColumnName.ToLower().Trim()), c.Index })
                                .Select(c => new { ColumnName = string.IsNullOrWhiteSpace(c.ColumnName) ? "column" : c.ColumnName, c.Index })
                                .GroupBy(c => c.ColumnName)
                                .SelectMany(g => g.Select(i => new { ColumnName = i.ColumnName + (g.Count() == 1 ? string.Empty : "_" + i.Index.ToString()), Index = i.Index }))
                                .OrderBy(i => i.Index)
                                .Select(c => c.ColumnName)
                                .Select(c => new DataColumn(c, typeof(string)))
                                .ToArray()
                            );
                    }
                    else
                    {
                        verboseLogAction(string.Format("generate columns"));
                        for (int i = 0; i < firstRow.Fields.Length; i++)
                            res.Table.Columns.Add(string.Format("column_{0}", i), typeof(string));
                    }
                    verboseLogAction(string.Format("read columns done. columns count: '{0}'", res.Table.Columns.Count));
                    verboseLogAction("validate table");
                    tableValidator(res.Table);
                    verboseLogAction("table validation done");
                    #endregion

                    var tableColumns = res.Table
                        .Columns
                        .OfType<DataColumn>()
                        .Select(c => c.ColumnName)
                        .ToArray();

                    var exprFilter = rowFilter.Compile();

                    var dataRows = rows
                        .Skip(hasColumns ? 1 : 0)
                        .Select(i => new { i.Fields, i.Index, Row = res.Table.NewRow() })
                        .AsParallel()
                        .Select(item =>
                            {
                                var minLength = Math.Min(item.Fields.Length, tableColumns.Length);
                                for (int n = 0; n < minLength; n++)
                                    item.Row[n] = item.Fields[n];
                                return new { item.Index, item.Row };
                            })
                        .OrderBy(r => r.Index)
                        .ToArray()
                        .Select(i => new
                        {
                            i.Index,
                            i.Row,
                            IsFiltered = exprFilter(i.Row)
                        })
                        .ToArray();

                    foreach (var dr in dataRows.Where(r => r.IsFiltered))
                        verboseLogAction(string.Format("column validation error on index: {0}, row: '{1}'", dr.Index, lines.ElementAt(dr.Index)));

                    var validRows = dataRows.Where(r => !r.IsFiltered);
                    foreach (var dr in validRows)
                        res.Table.Rows.Add(dr.Row);

                    res.ProcessedRowCount = validRows.Count() + (hasColumns ? 1 : 0);
                }
                else
                    throw new Exception(Resource.Helpers_CSV_Load_NoOneRowFound);
            }
            catch (Exception ex)
            {
                var e = new Exception("Data read exception. See inner exception for details", ex);
                e.Data.Add("Exception thrown at line number", res.ProcessedRowCount);
                e.Data.Add("Exception thrown at line", lines.ElementAt(res.ProcessedRowCount));
                throw e;
            }
            finally
            {
                verboseLogAction(string.Format("import end. Imported '{0}' from '{1}' rows.", res.Table.Rows.Count, res.TotalRowCount));
            }
            return res;
        }
예제 #2
0
파일: Helpers.CSV.cs 프로젝트: kblc/Helpers
        /// <summary>
        /// Save DataTable to CSV file
        /// </summary>
        /// <param name="table">Table with data for CSV file</param>
        /// <param name="filePath">File path to save file</param>
        /// <param name="encoding">File encoding</param>
        /// <param name="hasColumns">Write column line</param>
        /// <param name="delimiter">Separator between data</param>
        /// <param name="verboseLogAction">Action for verbose saving</param>
        /// <param name="columnRenamer">Function for rename columns before save</param>
        /// <param name="excludeColumn">Function for exclude columns</param>
        /// <returns></returns>
        public static CSVFile Save(
            DataTable table, 
            string filePath, 
            Encoding encoding = null, 
            bool hasColumns = true, 
            string delimiter = ";", 
            Action<string> verboseLogAction = null, 
            Func<string, string> columnRenamer = null, 
            Func<DataColumn, bool> excludeColumn = null)
        {
            verboseLogAction = verboseLogAction ?? new Action<string>(s => { });
            encoding = encoding ?? Encoding.Default;

            verboseLogAction("get lines for export files...");

            var lines = Save(table: table, hasColumns: hasColumns, delimiter: delimiter, verboseLogAction: (s) => { verboseLogAction(string.Format("save to lines: {0}", s)); }, columnRenamer: columnRenamer, excludeColumn: excludeColumn);

            verboseLogAction("get lines done");

            var res = new CSVFile()
            {
                FilePath = filePath,
                Table = table,
                TotalRowCount = table.Rows.Count,
                ProcessedRowCount = lines.Count() + (hasColumns ? 1 : 0)
            };

            using (FileStream fs = new FileStream(filePath, FileMode.Create))
                try
                {
                    WritePreamble(fs, encoding);
                    foreach (var line in lines)
                        AddLineToStream(fs, encoding, line);
                }
                finally
                {
                    fs.Flush();
                }

            return res;
        }