/// <summary> /// Parses the file. /// </summary> /// <returns>IEnumerable<dynamic>.</returns> /// <exception cref="System.IO.FileNotFoundException"></exception> public IEnumerable <dynamic> ParseFile() { var comparer = StringComparer.InvariantCultureIgnoreCase; Package spreadsheetPackage = null; var startColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.StartColumnKey); var endColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.EndColumnKey); try { spreadsheetPackage = Package.Open(_fileName, FileMode.Open, FileAccess.Read); using (var spreadsheetDocument = SpreadsheetDocument.Open(spreadsheetPackage)) { var workbookPart = spreadsheetDocument.WorkbookPart; //find the sheet with the matching name var sheet = spreadsheetDocument.WorkbookPart.Workbook.Descendants <Sheet>().FirstOrDefault(x => comparer.Equals(x.Name, this.SheetName)); if (sheet == null) { yield break; } //this is used by the reader to load the sheet for processing var worksheetPart = workbookPart.GetPartById(sheet.Id) as WorksheetPart; //used to get the rowcount of the sheet // ReSharper disable once PossibleNullReferenceException var sheetData = worksheetPart.Worksheet.Elements <SheetData>().First(); //check to ensure that we have any data rows at all, that have cell values var hasDataRow = sheetData.Descendants <Row>().Any(row => row.RowIndex >= this.DataRowStart && row.Descendants <Cell>().Any(cell => cell.CellValue != null && !string.IsNullOrWhiteSpace(cell.CellValue.Text)) ); if (!hasDataRow) { yield break; } //needed to look up text values from cells var sstpart = workbookPart.GetPartsOfType <SharedStringTablePart>().FirstOrDefault(); SharedStringTable sst = null; if (sstpart != null) { sst = sstpart.SharedStringTable; } var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats; IList <NumberingFormat> numberingFormats = null; if (workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats != null) { numberingFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats.OfType <NumberingFormat>().ToList(); } //open the reader from the part var reader = OpenXmlReader.Create(worksheetPart); // dictionary for the headers, the key will end up being the cell address minus the row number so that the headers and values can match up by key IDictionary <string, string> headers = new Dictionary <string, string>(); //the values dictionary for each row Dictionary <string, string> values = null; while (reader.Read()) { //read until we find our rows, then loop through them if (reader.ElementType == typeof(Row)) { do { var row = (Row)reader.LoadCurrentElement(); uint rowIndex = row.RowIndex; if (!(rowIndex == this.HeaderRow) && rowIndex < this.DataRowStart) { continue; } //if they have specified a end read row bail out if the rowindex exceeds that end value if (this.DataRowEnd >= this.DataRowStart && rowIndex > this.DataRowEnd) { break; } if (row.HasChildren) { //loop through all of the cells in the row, building a list of either header keys, or value keys depending on which row it is. values = new Dictionary <string, string>(); foreach (var cell in row.Descendants <Cell>()) { var cellKey = FileIOUtilities.GetColumnKey(cell.CellReference.Value); if (startColumnIndex != -1 || endColumnIndex != -1) { var cellIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(cellKey); if (startColumnIndex >= 0 && cellIndex < startColumnIndex) { continue; } if (endColumnIndex >= 0 && cellIndex > endColumnIndex) { break; } } var value = String.Empty; if (cell.DataType != null && cell.DataType == CellValues.SharedString && sst != null) { //read the text value out of the shared string table. value = sst.ChildElements[int.Parse(cell.CellValue.Text)].InnerText; } else if (cell.CellValue != null && !String.IsNullOrWhiteSpace(cell.CellValue.Text)) { if (cell.StyleIndex != null) //style index? { //frakking excel dates. wth. determing if a cell is formatted as a date is a huge pita var cellFormat = (CellFormat)cellFormats.ElementAt((int)cell.StyleIndex.Value); NumberingFormat numberingFormat = null; if (numberingFormats != null && cellFormat.NumberFormatId != null && cellFormat.NumberFormatId.HasValue) { numberingFormat = numberingFormats.FirstOrDefault(fmt => fmt.NumberFormatId.Value == cellFormat.NumberFormatId.Value); } if ((cell.DataType != null && cell.DataType == CellValues.Date) //just in case || (cellFormat.NumberFormatId != null && (cellFormat.NumberFormatId >= 14 && cellFormat.NumberFormatId <= 22)) //built in date formats || (numberingFormat != null && !numberingFormat.FormatCode.Value.Contains("[") && //so we dont match [Red] in numbering formats.... /sigh Regex.IsMatch(numberingFormat.FormatCode, "d|h|m|s|y", RegexOptions.IgnoreCase)) //custom date formats, would an isdate be too hard msft? ) // Dates { value = Convert.ToString(DateTime.FromOADate(double.Parse(cell.CellValue.Text)), CultureInfo.InvariantCulture); } else { value = cell.CellValue.Text; } } else { value = cell.CellValue.Text; } } if (rowIndex >= this.DataRowStart) { values.Add(cellKey, (value ?? "").Trim()); } else if (rowIndex == this.HeaderRow) { headers.Add(cellKey, value); } } } //we have accumulated either our headers or values for this row, so now we need to handle them if (rowIndex >= this.DataRowStart) { //sometimes excel reports the last row as higher than it actually has values, and we end up with an empty row. //skip the row if this happens. otherwise we can output a weird object value if (values.Any(x => !String.IsNullOrWhiteSpace(x.Value))) { dynamic retObj = FileIOUtilities.RowToExpando(headers, values, Convert.ToInt32(rowIndex)); retObj.RowId = Convert.ToInt32(rowIndex); //stream the data row back to the caller yield return(retObj); } } else if (rowIndex == this.HeaderRow) { //remove all characters that are not allowed in .net property names headers = FileIOUtilities.FixupHeaders(headers); //string headersString = "\t[" + String.Join("] varchar(500),\r\n\t[", headers.Values) + "] varchar(500)"; //Debug.WriteLine(headersString); } } while (reader.ReadNextSibling()); //rows are all done, break out of the loop break; } } } } finally { spreadsheetPackage.Close(); } }
public IDictionary <string, string> GetHeaders(bool removeInvalidChars = false) { var comparer = StringComparer.InvariantCultureIgnoreCase; // dictionary for the headers, the key will end up being the cell address minus the row number so that the headers and values can match up by key IDictionary <string, string> headers = new Dictionary <string, string>(comparer); Package spreadsheetPackage = null; var startColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.StartColumnKey); var endColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.EndColumnKey); try { spreadsheetPackage = Package.Open(_fileName, FileMode.Open, FileAccess.Read); using (var spreadsheetDocument = SpreadsheetDocument.Open(spreadsheetPackage)) { var workbookPart = spreadsheetDocument.WorkbookPart; //find the sheet with the matching name var sheet = spreadsheetDocument.WorkbookPart.Workbook.Descendants <Sheet>().FirstOrDefault(x => comparer.Equals(x.Name, this.SheetName)); if (sheet == null) { return(headers); } //this is used by the reader to load the sheet for processing var worksheetPart = workbookPart.GetPartById(sheet.Id) as WorksheetPart; //needed to look up text values from cells var sstpart = workbookPart.GetPartsOfType <SharedStringTablePart>().FirstOrDefault(); SharedStringTable sst = null; if (sstpart != null) { sst = sstpart.SharedStringTable; } //open the reader from the part var reader = OpenXmlReader.Create(worksheetPart); uint rowIndex = 0; while (reader.Read()) { //read until we find our rows, then loop through them if (reader.ElementType != typeof(Row)) { continue; } if (rowIndex > this.HeaderRow) { break; } do { var row = (Row)reader.LoadCurrentElement(); rowIndex = row.RowIndex; if (rowIndex > this.HeaderRow) { break; } if (rowIndex != this.HeaderRow) { continue; } if (row.HasChildren) { foreach (var cell in row.Descendants <Cell>()) { var cellKey = FileIOUtilities.GetColumnKey(cell.CellReference.Value); if (startColumnIndex != -1 || endColumnIndex != -1) { int cellIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(cellKey); if (startColumnIndex >= 0 && cellIndex < startColumnIndex) { continue; } if (endColumnIndex >= 0 && cellIndex > endColumnIndex) { break; } } string value; if (cell.CellValue != null) { if (cell.DataType != null && cell.DataType == CellValues.SharedString && sst != null) { //read the text value out of the shared string table. value = sst.ChildElements[int.Parse(cell.CellValue.Text)].InnerText; } else { value = cell.CellValue.Text; } if (rowIndex == this.HeaderRow) { headers.Add(cellKey, value); } } } } if (rowIndex != this.HeaderRow) { continue; } if (removeInvalidChars) { //remove all characters that are not allowed in .net property names headers = FileIOUtilities.FixupHeaders(headers); } } while (reader.ReadNextSibling()); //rows are all done, break out of the loop break; } } } finally { if (spreadsheetPackage != null) { spreadsheetPackage.Close(); } } return(headers); }