internal XlsWorkbook(Stream stream, string password, Encoding fallbackEncoding) { Stream = stream; using (var biffStream = new XlsBiffStream(stream, 0, 0, password)) { if (biffStream.BiffVersion == 0) { throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData); } BiffVersion = biffStream.BiffVersion; SecretKey = biffStream.SecretKey; Encryption = biffStream.Encryption; Encoding = biffStream.BiffVersion == 8 ? Encoding.Unicode : fallbackEncoding; if (biffStream.BiffType == BIFFTYPE.WorkbookGlobals) { ReadWorkbookGlobals(biffStream); } else if (biffStream.BiffType == BIFFTYPE.Worksheet) { // set up 'virtual' bound sheet pointing at this Sheets.Add(new XlsBiffBoundSheet(0, XlsBiffBoundSheet.SheetType.Worksheet, XlsBiffBoundSheet.SheetVisibility.Visible, "Sheet")); } else { throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData); } } }
private XlsRowBlock ReadNextBlock(XlsBiffStream biffStream, int startRow, int rows, int minOffset, int maxOffset) { var result = new XlsRowBlock { Rows = new Dictionary <int, Row>() }; // Ensure rows with physical records are initialized with height for (var i = 0; i < rows; i++) { if (RowOffsetMap.TryGetValue(startRow + i, out _)) { EnsureRow(result, startRow + i); } } if (minOffset == int.MaxValue) { return(result); } biffStream.Position = minOffset; XlsBiffRecord rec; XlsBiffRecord ixfe = null; while (biffStream.Position <= maxOffset && (rec = biffStream.Read()) != null) { if (rec.Id == BIFFRECORDTYPE.IXFE) { // BIFF2: If cell.xformat == 63, this contains the actual XF index >= 63 ixfe = rec; } if (rec is XlsBiffBlankCell cell) { var currentRow = EnsureRow(result, cell.RowIndex); if (cell.Id == BIFFRECORDTYPE.MULRK) { var cellValues = ReadMultiCell(cell); currentRow.Cells.AddRange(cellValues); } else { var xfIndex = GetXfIndexForCell(cell, ixfe); var cellValue = ReadSingleCell(biffStream, cell, xfIndex); currentRow.Cells.Add(cellValue); } ixfe = null; } } return(result); }
public IEnumerable <Row> ReadRows() { var rowIndex = 0; using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption)) { foreach (var rowBlock in ReadWorksheetRows(biffStream)) { for (; rowIndex < rowBlock.RowIndex; ++rowIndex) { yield return(new Row(rowIndex, DefaultRowHeight / 20.0, new List <Cell>())); } rowIndex++; yield return(rowBlock); } } }
private string TryGetFormulaString(XlsBiffStream biffStream, ExtendedFormat effectiveStyle) { var rec = biffStream.Read(); if (rec != null && rec.Id == BIFFRECORDTYPE.SHAREDFMLA) { rec = biffStream.Read(); } if (rec != null && rec.Id == BIFFRECORDTYPE.STRING) { var stringRecord = (XlsBiffFormulaString)rec; var formulaEncoding = GetFont(effectiveStyle.FontIndex)?.ByteStringEncoding ?? Encoding; // Workbook.GetFontEncodingFromXF(xFormat) ?? Encoding; return(stringRecord.GetValue(formulaEncoding)); } // Bad data - could not find a string following the formula return(null); }
private object TryGetFormulaValue(XlsBiffStream biffStream, XlsBiffFormulaCell formulaCell, ExtendedFormat effectiveStyle, out CellError?error) { error = null; switch (formulaCell.FormulaType) { case XlsBiffFormulaCell.FormulaValueType.Boolean: return(formulaCell.BooleanValue); case XlsBiffFormulaCell.FormulaValueType.Error: error = (CellError)formulaCell.ErrorValue; return(null); case XlsBiffFormulaCell.FormulaValueType.EmptyString: return(string.Empty); case XlsBiffFormulaCell.FormulaValueType.Number: return(TryConvertOADateTime(formulaCell.XNumValue, effectiveStyle.NumberFormatIndex)); case XlsBiffFormulaCell.FormulaValueType.String: return(TryGetFormulaString(biffStream, effectiveStyle)); // Bad data or new formula value type default: return(null); } }
private IEnumerable <Row> ReadWorksheetRows(XlsBiffStream biffStream) { var rowIndex = 0; while (rowIndex < RowCount) { GetBlockSize(rowIndex, out var blockRowCount, out var minOffset, out var maxOffset); var block = ReadNextBlock(biffStream, rowIndex, blockRowCount, minOffset, maxOffset); for (var i = 0; i < blockRowCount; ++i) { if (block.Rows.TryGetValue(rowIndex + i, out var row)) { yield return(row); } } rowIndex += blockRowCount; } }
private void Dispose(bool disposing) { // Check to see if Dispose has already been called. if (!this.disposed) { if (disposing) { if (m_workbookData != null) m_workbookData.Dispose(); if (m_sheets != null) m_sheets.Clear(); } m_workbookData = null; m_sheets = null; m_stream = null; m_globals = null; m_encoding = null; m_hdr = null; disposed = true; } }
private void fail(string message) { m_exceptionMessage = message; m_isValid = false; m_file.Close(); m_isClosed = true; m_workbookData = null; m_sheets = null; m_stream = null; m_globals = null; m_encoding = null; m_hdr = null; }
private void readWorkBookGlobals() { //Read Header try { m_hdr = XlsHeader.ReadHeader(m_file); } catch (Exceptions.HeaderException ex) { fail(ex.Message); return; } catch (FormatException ex) { fail(ex.Message); return; } XlsRootDirectory dir = new XlsRootDirectory(m_hdr); XlsDirectoryEntry workbookEntry = dir.FindEntry(WORKBOOK) ?? dir.FindEntry(BOOK); if (workbookEntry == null) { fail(Errors.ErrorStreamWorkbookNotFound); return; } if (workbookEntry.EntryType != STGTY.STGTY_STREAM) { fail(Errors.ErrorWorkbookIsNotStream); return; } m_stream = new XlsBiffStream(m_hdr, workbookEntry.StreamFirstSector); m_globals = new XlsWorkbookGlobals(); m_stream.Seek(0, SeekOrigin.Begin); XlsBiffRecord rec = m_stream.Read(); XlsBiffBOF bof = rec as XlsBiffBOF; if (bof == null || bof.Type != BIFFTYPE.WorkbookGlobals) { fail(Errors.ErrorWorkbookGlobalsInvalidData); return; } bool sst = false; m_version = bof.Version; m_sheets = new List<XlsWorksheet>(); while (null != (rec = m_stream.Read())) { switch (rec.ID) { case BIFFRECORDTYPE.INTERFACEHDR: m_globals.InterfaceHdr = (XlsBiffInterfaceHdr)rec; break; case BIFFRECORDTYPE.BOUNDSHEET: XlsBiffBoundSheet sheet = (XlsBiffBoundSheet)rec; if (sheet.Type != XlsBiffBoundSheet.SheetType.Worksheet) break; sheet.IsV8 = isV8(); sheet.UseEncoding = m_encoding; m_sheets.Add(new XlsWorksheet(m_globals.Sheets.Count, sheet)); m_globals.Sheets.Add(sheet); break; case BIFFRECORDTYPE.MMS: m_globals.MMS = rec; break; case BIFFRECORDTYPE.COUNTRY: m_globals.Country = rec; break; case BIFFRECORDTYPE.CODEPAGE: m_globals.CodePage = (XlsBiffSimpleValueRecord)rec; try { m_encoding = Encoding.GetEncoding(m_globals.CodePage.Value); } catch (ArgumentException) { // Warning - Password protection // TODO: Attach to ILog } break; case BIFFRECORDTYPE.FONT: case BIFFRECORDTYPE.FONT_V34: m_globals.Fonts.Add(rec); break; case BIFFRECORDTYPE.FORMAT: case BIFFRECORDTYPE.FORMAT_V23: m_globals.Formats.Add(rec); break; case BIFFRECORDTYPE.XF: case BIFFRECORDTYPE.XF_V4: case BIFFRECORDTYPE.XF_V3: case BIFFRECORDTYPE.XF_V2: m_globals.ExtendedFormats.Add(rec); break; case BIFFRECORDTYPE.SST: m_globals.SST = (XlsBiffSST)rec; sst = true; break; case BIFFRECORDTYPE.CONTINUE: if (!sst) break; XlsBiffContinue contSST = (XlsBiffContinue)rec; m_globals.SST.Append(contSST); break; case BIFFRECORDTYPE.EXTSST: m_globals.ExtSST = rec; sst = false; break; case BIFFRECORDTYPE.PROTECT: case BIFFRECORDTYPE.PASSWORD: case BIFFRECORDTYPE.PROT4REVPASSWORD: //IsProtected break; case BIFFRECORDTYPE.EOF: if (m_globals.SST != null) m_globals.SST.ReadStrings(); return; default: continue; } } }
private void ParseXlsStream(Stream fileStream) { using (m_file = fileStream) { m_hdr = XlsHeader.ReadHeader(m_file); XlsRootDirectory dir = new XlsRootDirectory(m_hdr); XlsDirectoryEntry workbookEntry = dir.FindEntry(WORKBOOK) ?? dir.FindEntry(BOOK); if (workbookEntry == null) throw new FileNotFoundException(Errors.ErrorStreamWorkbookNotFound); if (workbookEntry.EntryType != STGTY.STGTY_STREAM) throw new FormatException(Errors.ErrorWorkbookIsNotStream); m_stream = new XlsBiffStream(m_hdr, workbookEntry.StreamFirstSector); ReadWorkbookGlobals(); m_workbookData = new DataSet(); for (int i = 0; i < m_sheets.Count; i++) { if (ReadWorksheet(m_sheets[i])) m_workbookData.Tables.Add(m_sheets[i].Data); } m_globals.SST = null; m_globals = null; m_sheets = null; m_stream = null; m_hdr = null; GC.Collect(); GC.SuppressFinalize(this); } }
private void ReadWorkbookGlobals(XlsBiffStream biffStream) { var formats = new Dictionary <int, XlsBiffFormatString>(); XlsBiffRecord rec; while ((rec = biffStream.Read()) != null && !(rec is XlsBiffEof)) { switch (rec) { case XlsBiffInterfaceHdr hdr: InterfaceHdr = hdr; break; case XlsBiffBoundSheet sheet: if (sheet.Type != XlsBiffBoundSheet.SheetType.Worksheet) { break; } Sheets.Add(sheet); break; case XlsBiffSimpleValueRecord codePage when rec.Id == BIFFRECORDTYPE.CODEPAGE: // [MS-XLS 2.4.52 CodePage] An unsigned integer that specifies the workbook�s code page.The value MUST be one // of the code page values specified in [CODEPG] or the special value 1200, which means that the // workbook is Unicode. CodePage = codePage; Encoding = EncodingHelper.GetEncoding(CodePage.Value); break; case XlsBiffSimpleValueRecord is1904 when rec.Id == BIFFRECORDTYPE.RECORD1904: IsDate1904 = is1904.Value == 1; break; case XlsBiffFont font: Fonts.Add(font); break; case XlsBiffFormatString format23 when rec.Id == BIFFRECORDTYPE.FORMAT_V23: formats.Add((ushort)formats.Count, format23); break; case XlsBiffFormatString fmt when rec.Id == BIFFRECORDTYPE.FORMAT: var index = fmt.Index; if (!formats.ContainsKey(index)) { formats.Add(index, fmt); } break; case XlsBiffXF xf: AddXf(xf); break; case XlsBiffSST sst: SST = sst; break; case XlsBiffContinue sstContinue: if (SST != null) { SST.ReadContinueStrings(sstContinue); } break; case XlsBiffRecord _ when rec.Id == BIFFRECORDTYPE.MMS: Mms = rec; break; case XlsBiffRecord _ when rec.Id == BIFFRECORDTYPE.COUNTRY: Country = rec; break; case XlsBiffRecord _ when rec.Id == BIFFRECORDTYPE.EXTSST: ExtSST = rec; break; // case BIFFRECORDTYPE.PROTECT: // case BIFFRECORDTYPE.PROT4REVPASSWORD: // IsProtected // break; // case BIFFRECORDTYPE.PASSWORD: default: break; } } if (SST != null) { SST.Flush(); } foreach (var format in formats) { // We don't decode the value until here in-case there are format records before the // codepage record. Formats.Add(format.Key, new NumberFormatString(format.Value.GetValue(Encoding))); } }
private void ReadWorksheetGlobals() { using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption)) { // Check the expected BOF record was found in the BIFF stream if (biffStream.BiffVersion == 0 || biffStream.BiffType != BIFFTYPE.Worksheet) { return; } XlsBiffHeaderFooterString header = null; XlsBiffHeaderFooterString footer = null; var ixfeOffset = -1; int maxCellColumn = 0; int maxRowCount = 0; // number of rows with cell records int maxRowCountFromRowRecord = 0; // number of rows with row records var mergeCells = new List <CellRange>(); var biffFormats = new Dictionary <ushort, XlsBiffFormatString>(); var recordOffset = biffStream.Position; var rec = biffStream.Read(); var columnWidths = new List <Column>(); while (rec != null && !(rec is XlsBiffEof)) { switch (rec) { case XlsBiffDimensions dims: FieldCount = dims.LastColumn; RowCount = (int)dims.LastRow; break; case XlsBiffDefaultRowHeight defaultRowHeightRecord: DefaultRowHeight = defaultRowHeightRecord.RowHeight; break; case XlsBiffSimpleValueRecord is1904 when rec.Id == BIFFRECORDTYPE.RECORD1904: IsDate1904 = is1904.Value == 1; break; case XlsBiffXF xf when rec.Id == BIFFRECORDTYPE.XF_V2 || rec.Id == BIFFRECORDTYPE.XF_V3 || rec.Id == BIFFRECORDTYPE.XF_V4: // NOTE: XF records should only occur in raw BIFF2-4 single worksheet documents without the workbook stream, or globally in the workbook stream. // It is undefined behavior if multiple worksheets in a workbook declare XF records. Workbook.AddXf(xf); break; case XlsBiffMergeCells mc: mergeCells.AddRange(mc.MergeCells); break; case XlsBiffColInfo colInfo: columnWidths.Add(colInfo.Value); break; case XlsBiffFormatString fmt when rec.Id == BIFFRECORDTYPE.FORMAT: if (Workbook.BiffVersion >= 5) { // fmt.Index exists on BIFF5+ only biffFormats.Add(fmt.Index, fmt); } else { biffFormats.Add((ushort)biffFormats.Count, fmt); } break; case XlsBiffFormatString fmt23 when rec.Id == BIFFRECORDTYPE.FORMAT_V23: biffFormats.Add((ushort)biffFormats.Count, fmt23); break; case XlsBiffSimpleValueRecord codePage when rec.Id == BIFFRECORDTYPE.CODEPAGE: Encoding = EncodingHelper.GetEncoding(codePage.Value); break; case XlsBiffHeaderFooterString h when rec.Id == BIFFRECORDTYPE.HEADER && rec.RecordSize > 0: header = h; break; case XlsBiffHeaderFooterString f when rec.Id == BIFFRECORDTYPE.FOOTER && rec.RecordSize > 0: footer = f; break; case XlsBiffCodeName codeName: CodeName = codeName.GetValue(Encoding); break; case XlsBiffRow row: SetMinMaxRow(row.RowIndex, row); // Count rows by row records without affecting the overlap in OffsetMap maxRowCountFromRowRecord = Math.Max(maxRowCountFromRowRecord, row.RowIndex + 1); break; case XlsBiffBlankCell cell: maxCellColumn = Math.Max(maxCellColumn, cell.ColumnIndex + 1); maxRowCount = Math.Max(maxRowCount, cell.RowIndex + 1); if (ixfeOffset != -1) { SetMinMaxRowOffset(cell.RowIndex, ixfeOffset, maxRowCount - 1); ixfeOffset = -1; } SetMinMaxRowOffset(cell.RowIndex, recordOffset, maxRowCount - 1); break; case XlsBiffRecord ixfe when rec.Id == BIFFRECORDTYPE.IXFE: ixfeOffset = recordOffset; break; } recordOffset = biffStream.Position; rec = biffStream.Read(); // Stop if we find the start out a new substream. Not always that files have the required EOF before a substream BOF. if (rec is XlsBiffBOF) { break; } } if (header != null || footer != null) { HeaderFooter = new HeaderFooter(footer?.GetValue(Encoding), header?.GetValue(Encoding)); } foreach (var biffFormat in biffFormats) { Workbook.AddNumberFormat(biffFormat.Key, biffFormat.Value.GetValue(Encoding)); } if (mergeCells.Count > 0) { MergeCells = mergeCells.ToArray(); } if (FieldCount < maxCellColumn) { FieldCount = maxCellColumn; } maxRowCount = Math.Max(maxRowCount, maxRowCountFromRowRecord); if (RowCount < maxRowCount) { RowCount = maxRowCount; } if (columnWidths.Count > 0) { ColumnWidths = columnWidths.ToArray(); } } }
/// <summary> /// Reads additional records if needed: a string record might follow a formula result /// </summary> private Cell ReadSingleCell(XlsBiffStream biffStream, XlsBiffBlankCell cell, int xfIndex) { LogManager.Log(this).Debug("ReadSingleCell {0}", cell.Id); var effectiveStyle = Workbook.GetEffectiveCellStyle(xfIndex, cell.Format); var numberFormatIndex = effectiveStyle.NumberFormatIndex; object value = null; CellError?error = null; switch (cell.Id) { case BIFFRECORDTYPE.BOOLERR: if (cell.ReadByte(7) == 0) { value = cell.ReadByte(6) != 0; } else { error = (CellError)cell.ReadByte(6); } break; case BIFFRECORDTYPE.BOOLERR_OLD: if (cell.ReadByte(8) == 0) { value = cell.ReadByte(7) != 0; } else { error = (CellError)cell.ReadByte(7); } break; case BIFFRECORDTYPE.INTEGER: case BIFFRECORDTYPE.INTEGER_OLD: value = TryConvertOADateTime(((XlsBiffIntegerCell)cell).Value, numberFormatIndex); break; case BIFFRECORDTYPE.NUMBER: case BIFFRECORDTYPE.NUMBER_OLD: value = TryConvertOADateTime(((XlsBiffNumberCell)cell).Value, numberFormatIndex); break; case BIFFRECORDTYPE.LABEL: case BIFFRECORDTYPE.LABEL_OLD: case BIFFRECORDTYPE.RSTRING: value = GetLabelString((XlsBiffLabelCell)cell, effectiveStyle); break; case BIFFRECORDTYPE.LABELSST: value = Workbook.SST.GetString(((XlsBiffLabelSSTCell)cell).SSTIndex, Encoding); break; case BIFFRECORDTYPE.RK: value = TryConvertOADateTime(((XlsBiffRKCell)cell).Value, numberFormatIndex); break; case BIFFRECORDTYPE.BLANK: case BIFFRECORDTYPE.BLANK_OLD: case BIFFRECORDTYPE.MULBLANK: // Skip blank cells break; case BIFFRECORDTYPE.FORMULA: case BIFFRECORDTYPE.FORMULA_V3: case BIFFRECORDTYPE.FORMULA_V4: value = TryGetFormulaValue(biffStream, (XlsBiffFormulaCell)cell, effectiveStyle, out error); break; } return(new Cell(cell.ColumnIndex, value, effectiveStyle, error)); }