Beispiel #1
0
        /// <summary>
        /// Parses the file.
        /// </summary>
        /// <returns>IEnumerable&lt;dynamic&gt;.</returns>
        /// <exception cref="System.IO.FileNotFoundException"></exception>
        public IEnumerable <dynamic> ParseFile()
        {
            var comparer = StringComparer.InvariantCultureIgnoreCase;

            Package spreadsheetPackage = null;

            var startColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.StartColumnKey);
            var endColumnIndex   = FileIOUtilities.ConvertExcelColumnNameToNumber(this.EndColumnKey);

            try
            {
                spreadsheetPackage = Package.Open(_fileName, FileMode.Open, FileAccess.Read);

                using (var spreadsheetDocument = SpreadsheetDocument.Open(spreadsheetPackage))
                {
                    var workbookPart = spreadsheetDocument.WorkbookPart;
                    //find the sheet with the matching name
                    var sheet = spreadsheetDocument.WorkbookPart.Workbook.Descendants <Sheet>().FirstOrDefault(x => comparer.Equals(x.Name, this.SheetName));
                    if (sheet == null)
                    {
                        yield break;
                    }

                    //this is used by the reader to load the sheet for processing
                    var worksheetPart = workbookPart.GetPartById(sheet.Id) as WorksheetPart;
                    //used to get the rowcount of the sheet
                    // ReSharper disable once PossibleNullReferenceException
                    var sheetData = worksheetPart.Worksheet.Elements <SheetData>().First();

                    //check to ensure that we have any data rows at all, that have cell values
                    var hasDataRow = sheetData.Descendants <Row>().Any(row =>
                                                                       row.RowIndex >= this.DataRowStart &&
                                                                       row.Descendants <Cell>().Any(cell => cell.CellValue != null && !string.IsNullOrWhiteSpace(cell.CellValue.Text))
                                                                       );
                    if (!hasDataRow)
                    {
                        yield break;
                    }

                    //needed to look up text values from cells
                    var sstpart           = workbookPart.GetPartsOfType <SharedStringTablePart>().FirstOrDefault();
                    SharedStringTable sst = null;
                    if (sstpart != null)
                    {
                        sst = sstpart.SharedStringTable;
                    }

                    var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats;
                    IList <NumberingFormat> numberingFormats = null;
                    if (workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats != null)
                    {
                        numberingFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats.OfType <NumberingFormat>().ToList();
                    }

                    //open the reader from the part
                    var reader = OpenXmlReader.Create(worksheetPart);

                    // dictionary for the headers, the key will end up being the cell address minus the row number so that the headers and values can match up by key
                    IDictionary <string, string> headers = new Dictionary <string, string>();
                    //the values dictionary for each row
                    Dictionary <string, string> values = null;

                    while (reader.Read())
                    {
                        //read until we find our rows, then loop through them
                        if (reader.ElementType == typeof(Row))
                        {
                            do
                            {
                                var  row      = (Row)reader.LoadCurrentElement();
                                uint rowIndex = row.RowIndex;

                                if (!(rowIndex == this.HeaderRow) && rowIndex < this.DataRowStart)
                                {
                                    continue;
                                }
                                //if they have specified a end read row bail out if the rowindex exceeds that end value
                                if (this.DataRowEnd >= this.DataRowStart && rowIndex > this.DataRowEnd)
                                {
                                    break;
                                }

                                if (row.HasChildren)
                                {
                                    //loop through all of the cells in the row, building a list of either header keys, or value keys depending on which row it is.
                                    values = new Dictionary <string, string>();
                                    foreach (var cell in row.Descendants <Cell>())
                                    {
                                        var cellKey = FileIOUtilities.GetColumnKey(cell.CellReference.Value);

                                        if (startColumnIndex != -1 || endColumnIndex != -1)
                                        {
                                            var cellIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(cellKey);
                                            if (startColumnIndex >= 0 && cellIndex < startColumnIndex)
                                            {
                                                continue;
                                            }
                                            if (endColumnIndex >= 0 && cellIndex > endColumnIndex)
                                            {
                                                break;
                                            }
                                        }

                                        var value = String.Empty;

                                        if (cell.DataType != null && cell.DataType == CellValues.SharedString && sst != null)
                                        {
                                            //read the text value out of the shared string table.
                                            value = sst.ChildElements[int.Parse(cell.CellValue.Text)].InnerText;
                                        }
                                        else if (cell.CellValue != null && !String.IsNullOrWhiteSpace(cell.CellValue.Text))
                                        {
                                            if (cell.StyleIndex != null) //style index?
                                            {
                                                //frakking excel dates. wth. determing if a cell is formatted as a date is a huge pita
                                                var             cellFormat      = (CellFormat)cellFormats.ElementAt((int)cell.StyleIndex.Value);
                                                NumberingFormat numberingFormat = null;
                                                if (numberingFormats != null && cellFormat.NumberFormatId != null && cellFormat.NumberFormatId.HasValue)
                                                {
                                                    numberingFormat =
                                                        numberingFormats.FirstOrDefault(fmt => fmt.NumberFormatId.Value == cellFormat.NumberFormatId.Value);
                                                }

                                                if ((cell.DataType != null && cell.DataType == CellValues.Date) //just in case
                                                    ||
                                                    (cellFormat.NumberFormatId != null &&
                                                     (cellFormat.NumberFormatId >= 14 && cellFormat.NumberFormatId <= 22)) //built in date formats
                                                    ||
                                                    (numberingFormat != null &&
                                                     !numberingFormat.FormatCode.Value.Contains("[") && //so we dont match [Red] in numbering formats.... /sigh
                                                     Regex.IsMatch(numberingFormat.FormatCode, "d|h|m|s|y", RegexOptions.IgnoreCase))
                                                    //custom date formats, would an isdate be too hard msft?
                                                    ) // Dates
                                                {
                                                    value = Convert.ToString(DateTime.FromOADate(double.Parse(cell.CellValue.Text)),
                                                                             CultureInfo.InvariantCulture);
                                                }
                                                else
                                                {
                                                    value = cell.CellValue.Text;
                                                }
                                            }
                                            else
                                            {
                                                value = cell.CellValue.Text;
                                            }
                                        }

                                        if (rowIndex >= this.DataRowStart)
                                        {
                                            values.Add(cellKey, (value ?? "").Trim());
                                        }
                                        else if (rowIndex == this.HeaderRow)
                                        {
                                            headers.Add(cellKey, value);
                                        }
                                    }
                                }

                                //we have accumulated either our headers or values for this row, so now we need to handle them
                                if (rowIndex >= this.DataRowStart)
                                {
                                    //sometimes excel reports the last row as higher than it actually has values, and we end up with an empty row.
                                    //skip the row if this happens. otherwise we can output a weird object value
                                    if (values.Any(x => !String.IsNullOrWhiteSpace(x.Value)))
                                    {
                                        dynamic retObj = FileIOUtilities.RowToExpando(headers, values, Convert.ToInt32(rowIndex));
                                        retObj.RowId = Convert.ToInt32(rowIndex);
                                        //stream the data row back to the caller
                                        yield return(retObj);
                                    }
                                }
                                else if (rowIndex == this.HeaderRow)
                                {
                                    //remove all characters that are not allowed in .net property names
                                    headers = FileIOUtilities.FixupHeaders(headers);
                                    //string headersString = "\t[" + String.Join("] varchar(500),\r\n\t[", headers.Values) + "] varchar(500)";
                                    //Debug.WriteLine(headersString);
                                }
                            } while (reader.ReadNextSibling());
                            //rows are all done, break out of the loop
                            break;
                        }
                    }
                }
            }
            finally
            {
                spreadsheetPackage.Close();
            }
        }
Beispiel #2
0
        public IDictionary <string, string> GetHeaders(bool removeInvalidChars = false)
        {
            var comparer = StringComparer.InvariantCultureIgnoreCase;
            // dictionary for the headers, the key will end up being the cell address minus the row number so that the headers and values can match up by key
            IDictionary <string, string> headers = new Dictionary <string, string>(comparer);


            Package spreadsheetPackage = null;

            var startColumnIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(this.StartColumnKey);
            var endColumnIndex   = FileIOUtilities.ConvertExcelColumnNameToNumber(this.EndColumnKey);

            try
            {
                spreadsheetPackage = Package.Open(_fileName, FileMode.Open, FileAccess.Read);

                using (var spreadsheetDocument = SpreadsheetDocument.Open(spreadsheetPackage))
                {
                    var workbookPart = spreadsheetDocument.WorkbookPart;
                    //find the sheet with the matching name
                    var sheet = spreadsheetDocument.WorkbookPart.Workbook.Descendants <Sheet>().FirstOrDefault(x => comparer.Equals(x.Name, this.SheetName));
                    if (sheet == null)
                    {
                        return(headers);
                    }

                    //this is used by the reader to load the sheet for processing
                    var worksheetPart = workbookPart.GetPartById(sheet.Id) as WorksheetPart;

                    //needed to look up text values from cells
                    var sstpart           = workbookPart.GetPartsOfType <SharedStringTablePart>().FirstOrDefault();
                    SharedStringTable sst = null;
                    if (sstpart != null)
                    {
                        sst = sstpart.SharedStringTable;
                    }

                    //open the reader from the part
                    var  reader   = OpenXmlReader.Create(worksheetPart);
                    uint rowIndex = 0;


                    while (reader.Read())
                    {
                        //read until we find our rows, then loop through them
                        if (reader.ElementType != typeof(Row))
                        {
                            continue;
                        }
                        if (rowIndex > this.HeaderRow)
                        {
                            break;
                        }

                        do
                        {
                            var row = (Row)reader.LoadCurrentElement();
                            rowIndex = row.RowIndex;

                            if (rowIndex > this.HeaderRow)
                            {
                                break;
                            }
                            if (rowIndex != this.HeaderRow)
                            {
                                continue;
                            }

                            if (row.HasChildren)
                            {
                                foreach (var cell in row.Descendants <Cell>())
                                {
                                    var cellKey = FileIOUtilities.GetColumnKey(cell.CellReference.Value);
                                    if (startColumnIndex != -1 || endColumnIndex != -1)
                                    {
                                        int cellIndex = FileIOUtilities.ConvertExcelColumnNameToNumber(cellKey);
                                        if (startColumnIndex >= 0 && cellIndex < startColumnIndex)
                                        {
                                            continue;
                                        }
                                        if (endColumnIndex >= 0 && cellIndex > endColumnIndex)
                                        {
                                            break;
                                        }
                                    }

                                    string value;

                                    if (cell.CellValue != null)
                                    {
                                        if (cell.DataType != null && cell.DataType == CellValues.SharedString && sst != null)
                                        {
                                            //read the text value out of the shared string table.
                                            value = sst.ChildElements[int.Parse(cell.CellValue.Text)].InnerText;
                                        }
                                        else
                                        {
                                            value = cell.CellValue.Text;
                                        }

                                        if (rowIndex == this.HeaderRow)
                                        {
                                            headers.Add(cellKey, value);
                                        }
                                    }
                                }
                            }

                            if (rowIndex != this.HeaderRow)
                            {
                                continue;
                            }

                            if (removeInvalidChars)
                            {
                                //remove all characters that are not allowed in .net property names
                                headers = FileIOUtilities.FixupHeaders(headers);
                            }
                        } while (reader.ReadNextSibling());
                        //rows are all done, break out of the loop
                        break;
                    }
                }
            }
            finally
            {
                if (spreadsheetPackage != null)
                {
                    spreadsheetPackage.Close();
                }
            }


            return(headers);
        }