public void ParseCsvFileTest() { string filePath = Path.Combine(this.TestContext.TestDeploymentDir, "Files\\1095_Import_Employees.csv"); ICsvFileParser parser = new CsvFileParser(filePath); IDictionary <int, IList <string> > fileErrors = new Dictionary <int, IList <string> >(); IObjectValidator validator = new ObjectValidator(); int rowIndex = parser.RowStart; foreach (dynamic row in parser.ParseFile()) { List <string> errors = new List <string>(); Employee rowObj = FileIOUtilities.MapObject <Employee>(row, rowIndex, validator, null, ref errors); //rowObj.MapValues(rowIndex, row, validator, ref errors); validator.TryValidate(rowObj, ref errors); if (errors.Count > 0) { fileErrors.Add(rowIndex, errors); } rowIndex++; } Assert.IsTrue(fileErrors.Count >= 2); }
public ColumnInfoBase(string headerName, string valueFunction) { //if (String.IsNullOrWhiteSpace(headerName)) { throw new ArgumentNullException("headerName"); } if (valueFunction == null) { throw new ArgumentNullException("valueFunction"); } this.HeaderName = headerName; this.CleanHeaderName = FileIOUtilities.FixupHeader(this.HeaderName); this.ValueFunctionString = valueFunction; this.CellType = ExcelCellType.General; }
public static string WriteExcelFile5() { Console.WriteLine("Running WriteExcelFile5"); /*This example shows writing a Company class to a file with a column mapping that provides total control over the file layout and uses a iDataReader for the data source */ Func <int, string> col = (i) => FileIOUtilities.ConvertExcelNumberToLetter(i); var y = 1; //, formatCode: "#,##0.00" //, cellType: ExcelCellType.Currency, formatCode: "[Blue]$#,##0.00; [Red]-$#,##0.00;" //, cellType: ExcelCellType.Percent, formatCode: "0.00%" var map = new ColumnInfoList <Company> { { col(y++), "Company Id", (c) => c.CompanyId }, //insert a blank column at B for Foo, next column starts at C { col(y++), "Foo", (c) => null }, //, formatCode: "000-00-0000" { col(y++), "Legal Name", (c) => c.LegalName } }; map.Add(col(y++), "Doing Business As", (c) => c.DBAName, updateHeader: true); map.Add(col(y++), "Change Date", (c) => c.ChangeDate, cellType: ExcelCellType.Date, formatCode: "mm-dd-yyyy"); map.Add(col(y++), "UserId", (c) => c.UserId); var path = Common.GetFileName(Common.XLS_TYPE); using (var writer = new ExcelWriter(1, 3)) { writer.CreateSheetIfNotFound = true; using (var conn = new SqlConnection(ConfigurationManager.ConnectionStrings["DataModel"].ConnectionString)) { conn.Open(); using (var cmd = conn.CreateCommand()) { cmd.CommandText = "Select top (10000) * from Company"; cmd.CommandType = CommandType.Text; using (var reader = cmd.ExecuteReader()) { writer.WriteDataToSheet("Companies", reader, map); writer.WriteTo(path); } } } } return(path); }
/// <summary> /// Parses the file. /// </summary> /// <returns>IEnumerable<dynamic>.</returns> /// <exception cref="System.IO.FileNotFoundException"></exception> public IEnumerable <dynamic> ParseFile() { int rowIndex = this.RowStart; // TextFieldParser is in the Microsoft.VisualBasic.FileIO namespace. using (TextFieldParser parser = new TextFieldParser(_fileName)) { parser.SetFieldWidths(_fixedWidths.Maybe().FirstOrDefault()); parser.SetDelimiters(_delimiters.Maybe().DefaultIfEmpty(new string[] { ",", "\t" }).First()); parser.TextFieldType = parser.FieldWidths == null ? FieldType.Delimited : FieldType.FixedWidth; parser.TrimWhiteSpace = TrimWhiteSpace; parser.HasFieldsEnclosedInQuotes = HasQuotedFields; parser.CommentTokens = new string[] { "#" }; string[] headers = null; string[] values = null; if (FirstRowContainsHeaders) { headers = FileIOUtilities.FixupHeaders(parser.ReadFields()); } for (int i = 0; i < RowsToSkip; i++) { parser.ReadLine(); } while (!parser.EndOfData) { values = parser.ReadFields(); if (headers == null) { headers = Enumerable.Range(0, values.Length).Select(x => "Col" + x).ToArray <string>(); } //if all of the values are empty in the row, do not emit it. just continue on if (values.Any(x => !String.IsNullOrWhiteSpace(x))) { dynamic retObj = FileIOUtilities.RowToExpando(headers, values); retObj.RowId = rowIndex++; yield return(retObj); } } parser.Close(); } }
public ColumnInfo(string headerName, Expression <Func <T, object> > valueFunction) { //if (String.IsNullOrWhiteSpace(headerName)) { throw new ArgumentNullException("headerName"); } if (valueFunction == null) { throw new ArgumentNullException("valueFunction"); } this.HeaderName = headerName; this.CleanHeaderName = FileIOUtilities.FixupHeader(this.HeaderName); this.ValueFunction = valueFunction.Compile(); var type = FileIOUtilities.GetExpressionType <T>(valueFunction); var notNullType = Nullable.GetUnderlyingType(type); this.ValueFunctionType = notNullType ?? type; this.CellType = ExcelCellType.General; }
public static IList <Company> ReadCsvFile1() { Console.WriteLine("Running ReadCsvFile1"); var parser = new CsvFileParser(Common.CsvDataPath); var fileErrors = new Dictionary <int, IList <string> >(); var validator = new ObjectValidator(); //parser.Delimiters // can adjust the delimiters //parser.FixedWidths // can parse fixed widths var rowIndex = parser.RowStart; var companies = new List <Company>(); foreach (dynamic row in parser.ParseFile()) { var errors = new List <string>(); var rowObj = FileIOUtilities.MapObject <Company>(row, rowIndex, validator, null, ref errors); validator.TryValidate(rowObj, ref errors); companies.Add(rowObj); if (errors.Count > 0) { fileErrors.Add(rowIndex, errors); } rowIndex++; } foreach (var errs in fileErrors) { foreach (var err in errs.Value) { Console.WriteLine("Line:{0}, Error: {1}", errs.Key, err); } } return(companies); }
/// <summary> /// Returns an enumerator that iterates through the collection. /// </summary> /// <returns>A <see cref="T:System.Collections.Generic.IEnumerator`1" /> that can be used to iterate through the collection.</returns> public IEnumerator <SqlDataRecord> GetEnumerator() { if (_data == null || !_data.Any()) { yield break; } PropertyInfo[] properties = _importType.GetProperties(); StringComparer comparer = StringComparer.InvariantCultureIgnoreCase; this._validator = this._validator ?? new ObjectValidator(); bool?isDynamicType = null; int errorColumnOrdinal = -1; var sqlMetaArray = _sqlMetadata.ToArray(); if (_sqlMetadata.Any(x => comparer.Equals(x.Name, _errorColumn))) { SqlDataRecord tempRecord = new SqlDataRecord(sqlMetaArray); errorColumnOrdinal = tempRecord.GetOrdinal(_errorColumn); //will cause an exception if it does not exist, hence the any check tempRecord = null; } foreach (dynamic row in _data) { _rowIndex++; SqlDataRecord record = new SqlDataRecord(sqlMetaArray); List <string> errors = new List <string>(); //check the first object to see if it is a dynamic type as we dont need to run it throught the object mapper in that case if (!isDynamicType.HasValue) { isDynamicType = FileIOHelpers.IsDynamicType(row); } T rowObj = default(T); if (isDynamicType.Value) { try { rowObj = FileIOUtilities.MapObject <T>(row, _rowIndex, _validator, _fileValuesMapper, ref errors); } catch (Exception ex) { errors.Add(ex.ToString()); } } else { rowObj = row; } try { //built in data annotation validation this._validator.TryValidate(rowObj, ref errors); //custom validation if (_customValidator != null) { _customValidator.Invoke(rowObj, ref errors); } } catch (Exception ex) { errors.Add(ex.ToString()); } ISqlRecordMapper mapperObj = null; //if they provide a custom mapper use that one over the interface. if (this._customSqlMapper != null) { this._customSqlMapper.Invoke(rowObj, record, _rowIndex, errors); } else if ((mapperObj = rowObj as ISqlRecordMapper) != null) { mapperObj.MapSqlRecord(record, _rowIndex, errors); } else //last ditch effort, hopefully they don't rely on this { object val; //try to set the rows from the metadata, and the properties foreach (SqlMetaData metaData in _sqlMetadata) { string name = metaData.Name; val = null; if (!comparer.Equals(name, _errorColumn)) { var prop = properties.FirstOrDefault(x => comparer.Equals(x.Name, name)); if (prop != null && (val = prop.GetValue(rowObj, null)) != null) { record.SetValue(record.GetOrdinal(name), val); } } } //if an error column is defined, set the import errors if (errorColumnOrdinal != -1 && errors.Count != 0) { string errorMessage = FileIOHelpers.ErrorsToXml(errors, _rowIndex); record.SetString(errorColumnOrdinal, errorMessage); } } yield return(record); } }
public static IList <Company> ReadExcelFile1(string path = null, int headerRow = 2, int dataRow = 3) { /* Some important notes about header names: * 1) As the header name is being used for the "property" name of the dynamic object there are many characters that are not valid to be a valid c# property name. * During the reading of the file, the invalid characters in the header name are replaced using this regex [^A-Za-z0-9_]* with an empty string. * Then if the return of that replace operation is empty, then the column is named "Col(i)". Where (i) is the zero based column index. * Examples: * Column Name---------------Property Name * Company Id----------------row.CompanyId * Aims Company Id-----------row.AimsCompanyId * Some_Id_9-----------------row.Some_Id_9 * (@@@)---------------------row.Col1 - where 1 is the zero based index of that column. * * 2) Case sensitivity of the property names does not matter either. As the sender could change the case indiscrimiately. * 3) If a column is removed that you were expecting, the property will return empty and will not throw an exception. */ Console.WriteLine("Running ReadExcelFile1"); /*This example shows reading a Company class from a file. */ if (String.IsNullOrWhiteSpace(path)) { path = Common.ExcelDataPath; } //the parser is designed to read one sheet from a file at a time. Other sheets would require a new parser, or just use the same parser and change the sheet name. IExcelFileParser parser = new ExcelFileParser(path, "Companies", headerRow, dataRow); //this is where we will store any parser errors as we parse the file var fileErrors = new Dictionary <int, IList <string> >(); //this validator provides validation when parsing the columns, and data attributes like [Required] and it will also invoke the IValidatableObject interface IObjectValidator validator = new ObjectValidator(); //as the rowindex may not start at row 1, get it from the parser int rowIndex = parser.RowStart; var companies = new List <Company>(); foreach (dynamic row in parser.ParseFile()) { List <string> errors = new List <string>(); //create a reference to a custom mapper. this provides complete control over the mapping of the file row to the object, and the interface is skipped FileValuesMap <Company> mapper = Common.Company_FileMapper; //this utility performs mapping of the row to the object and invokes column mapping validation var rowObj = FileIOUtilities.MapObject <Company>(row, rowIndex, validator, mapper, ref errors); //calling the TryValidate method invoke the data annotation validation, and the IValidatableObject interface validator.TryValidate(rowObj, ref errors); companies.Add(rowObj); if (errors.Count > 0) { //we got errors for this row, so add them to the fileErrors dictionary fileErrors.Add(rowIndex, errors); } rowIndex++; } //write all the file errors out to the console foreach (var errs in fileErrors) { foreach (var err in errs.Value) { Console.WriteLine("Line:{0}, Error: {1}", errs.Key, err); } } return(companies); }
/// <summary> /// Parses the file. /// </summary> /// <returns>IEnumerable<dynamic>.</returns> /// <exception cref="System.IO.FileNotFoundException"></exception> public IEnumerable <dynamic> ParseFile() { var comparer = StringComparer.InvariantCultureIgnoreCase; Package spreadsheetPackage = null; var startColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.StartColumnKey); var endColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.EndColumnKey); try { spreadsheetPackage = Package.Open(_fileName, FileMode.Open, FileAccess.Read); using (var spreadsheetDocument = SpreadsheetDocument.Open(spreadsheetPackage)) { var workbookPart = spreadsheetDocument.WorkbookPart; //find the sheet with the matching name var sheet = spreadsheetDocument.WorkbookPart.Workbook.Descendants <Sheet>().FirstOrDefault(x => comparer.Equals(x.Name, this.SheetName)); if (sheet == null) { yield break; } //this is used by the reader to load the sheet for processing var worksheetPart = workbookPart.GetPartById(sheet.Id) as WorksheetPart; //used to get the rowcount of the sheet // ReSharper disable once PossibleNullReferenceException var sheetData = worksheetPart.Worksheet.Elements <SheetData>().First(); //check to ensure that we have any data rows at all, that have cell values var hasDataRow = sheetData.Descendants <Row>().Any(row => row.RowIndex >= this.DataRowStart && row.Descendants <Cell>().Any(cell => cell.CellValue != null && !string.IsNullOrWhiteSpace(cell.CellValue.Text)) ); if (!hasDataRow) { yield break; } //needed to look up text values from cells var sstpart = workbookPart.GetPartsOfType <SharedStringTablePart>().FirstOrDefault(); SharedStringTable sst = null; if (sstpart != null) { sst = sstpart.SharedStringTable; } var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats; IList <NumberingFormat> numberingFormats = null; if (workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats != null) { numberingFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats.OfType <NumberingFormat>().ToList(); } //open the reader from the part var reader = OpenXmlReader.Create(worksheetPart); // dictionary for the headers, the key will end up being the cell address minus the row number so that the headers and values can match up by key IDictionary <string, string> headers = new Dictionary <string, string>(); //the values dictionary for each row Dictionary <string, string> values = null; while (reader.Read()) { //read until we find our rows, then loop through them if (reader.ElementType == typeof(Row)) { do { var row = (Row)reader.LoadCurrentElement(); uint rowIndex = row.RowIndex; if (!(rowIndex == this.HeaderRow) && rowIndex < this.DataRowStart) { continue; } //if they have specified a end read row bail out if the rowindex exceeds that end value if (this.DataRowEnd >= this.DataRowStart && rowIndex > this.DataRowEnd) { break; } if (row.HasChildren) { //loop through all of the cells in the row, building a list of either header keys, or value keys depending on which row it is. values = new Dictionary <string, string>(); foreach (var cell in row.Descendants <Cell>()) { var cellKey = FileIOUtilities.GetColumnKey(cell.CellReference.Value); if (startColumnIndex != -1 || endColumnIndex != -1) { var cellIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(cellKey); if (startColumnIndex >= 0 && cellIndex < startColumnIndex) { continue; } if (endColumnIndex >= 0 && cellIndex > endColumnIndex) { break; } } var value = String.Empty; if (cell.DataType != null && cell.DataType == CellValues.SharedString && sst != null) { //read the text value out of the shared string table. value = sst.ChildElements[int.Parse(cell.CellValue.Text)].InnerText; } else if (cell.CellValue != null && !String.IsNullOrWhiteSpace(cell.CellValue.Text)) { if (cell.StyleIndex != null) //style index? { //frakking excel dates. wth. determing if a cell is formatted as a date is a huge pita var cellFormat = (CellFormat)cellFormats.ElementAt((int)cell.StyleIndex.Value); NumberingFormat numberingFormat = null; if (numberingFormats != null && cellFormat.NumberFormatId != null && cellFormat.NumberFormatId.HasValue) { numberingFormat = numberingFormats.FirstOrDefault(fmt => fmt.NumberFormatId.Value == cellFormat.NumberFormatId.Value); } if ((cell.DataType != null && cell.DataType == CellValues.Date) //just in case || (cellFormat.NumberFormatId != null && (cellFormat.NumberFormatId >= 14 && cellFormat.NumberFormatId <= 22)) //built in date formats || (numberingFormat != null && !numberingFormat.FormatCode.Value.Contains("[") && //so we dont match [Red] in numbering formats.... /sigh Regex.IsMatch(numberingFormat.FormatCode, "d|h|m|s|y", RegexOptions.IgnoreCase)) //custom date formats, would an isdate be too hard msft? ) // Dates { value = Convert.ToString(DateTime.FromOADate(double.Parse(cell.CellValue.Text)), CultureInfo.InvariantCulture); } else { value = cell.CellValue.Text; } } else { value = cell.CellValue.Text; } } if (rowIndex >= this.DataRowStart) { values.Add(cellKey, (value ?? "").Trim()); } else if (rowIndex == this.HeaderRow) { headers.Add(cellKey, value); } } } //we have accumulated either our headers or values for this row, so now we need to handle them if (rowIndex >= this.DataRowStart) { //sometimes excel reports the last row as higher than it actually has values, and we end up with an empty row. //skip the row if this happens. otherwise we can output a weird object value if (values.Any(x => !String.IsNullOrWhiteSpace(x.Value))) { dynamic retObj = FileIOUtilities.RowToExpando(headers, values, Convert.ToInt32(rowIndex)); retObj.RowId = Convert.ToInt32(rowIndex); //stream the data row back to the caller yield return(retObj); } } else if (rowIndex == this.HeaderRow) { //remove all characters that are not allowed in .net property names headers = FileIOUtilities.FixupHeaders(headers); //string headersString = "\t[" + String.Join("] varchar(500),\r\n\t[", headers.Values) + "] varchar(500)"; //Debug.WriteLine(headersString); } } while (reader.ReadNextSibling()); //rows are all done, break out of the loop break; } } } } finally { spreadsheetPackage.Close(); } }
public IDictionary <string, string> GetHeaders(bool removeInvalidChars = false) { var comparer = StringComparer.InvariantCultureIgnoreCase; // dictionary for the headers, the key will end up being the cell address minus the row number so that the headers and values can match up by key IDictionary <string, string> headers = new Dictionary <string, string>(comparer); Package spreadsheetPackage = null; var startColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.StartColumnKey); var endColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.EndColumnKey); try { spreadsheetPackage = Package.Open(_fileName, FileMode.Open, FileAccess.Read); using (var spreadsheetDocument = SpreadsheetDocument.Open(spreadsheetPackage)) { var workbookPart = spreadsheetDocument.WorkbookPart; //find the sheet with the matching name var sheet = spreadsheetDocument.WorkbookPart.Workbook.Descendants <Sheet>().FirstOrDefault(x => comparer.Equals(x.Name, this.SheetName)); if (sheet == null) { return(headers); } //this is used by the reader to load the sheet for processing var worksheetPart = workbookPart.GetPartById(sheet.Id) as WorksheetPart; //needed to look up text values from cells var sstpart = workbookPart.GetPartsOfType <SharedStringTablePart>().FirstOrDefault(); SharedStringTable sst = null; if (sstpart != null) { sst = sstpart.SharedStringTable; } //open the reader from the part var reader = OpenXmlReader.Create(worksheetPart); uint rowIndex = 0; while (reader.Read()) { //read until we find our rows, then loop through them if (reader.ElementType != typeof(Row)) { continue; } if (rowIndex > this.HeaderRow) { break; } do { var row = (Row)reader.LoadCurrentElement(); rowIndex = row.RowIndex; if (rowIndex > this.HeaderRow) { break; } if (rowIndex != this.HeaderRow) { continue; } if (row.HasChildren) { foreach (var cell in row.Descendants <Cell>()) { var cellKey = FileIOUtilities.GetColumnKey(cell.CellReference.Value); if (startColumnIndex != -1 || endColumnIndex != -1) { int cellIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(cellKey); if (startColumnIndex >= 0 && cellIndex < startColumnIndex) { continue; } if (endColumnIndex >= 0 && cellIndex > endColumnIndex) { break; } } string value; if (cell.CellValue != null) { if (cell.DataType != null && cell.DataType == CellValues.SharedString && sst != null) { //read the text value out of the shared string table. value = sst.ChildElements[int.Parse(cell.CellValue.Text)].InnerText; } else { value = cell.CellValue.Text; } if (rowIndex == this.HeaderRow) { headers.Add(cellKey, value); } } } } if (rowIndex != this.HeaderRow) { continue; } if (removeInvalidChars) { //remove all characters that are not allowed in .net property names headers = FileIOUtilities.FixupHeaders(headers); } } while (reader.ReadNextSibling()); //rows are all done, break out of the loop break; } } } finally { if (spreadsheetPackage != null) { spreadsheetPackage.Close(); } } return(headers); }