internal XlsWorkbook(Stream stream, string password, Encoding fallbackEncoding) { Stream = stream; using (var biffStream = new XlsBiffStream(stream, 0, 0, password)) { if (biffStream.BiffVersion == 0) { throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData); } BiffVersion = biffStream.BiffVersion; SecretKey = biffStream.SecretKey; Encryption = biffStream.Encryption; Encoding = biffStream.BiffVersion == 8 ? Encoding.Unicode : fallbackEncoding; if (biffStream.BiffType == BIFFTYPE.WorkbookGlobals) { ReadWorkbookGlobals(biffStream); } else if (biffStream.BiffType == BIFFTYPE.Worksheet) { // set up 'virtual' bound sheet pointing at this Sheets.Add(new XlsBiffBoundSheet(0, XlsBiffBoundSheet.SheetType.Worksheet, XlsBiffBoundSheet.SheetVisibility.Visible, "Sheet")); } else { throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData); } } }
public XlsSSTReader(XlsBiffSST sst, XlsBiffStream biffStream) { Sst = sst; BiffStream = biffStream; CurrentRecord = Sst; CurrentRecordOffset = 4 + 8; // +4 skips BIFF header, +8 skips SST header }
public IEnumerable <object[]> ReadRows() { var rowIndex = 0; var biffStream = new XlsBiffStream(Bytes, (int)DataOffset, Workbook.BiffVersion); while (true) { var block = ReadNextBlock(biffStream); var maxRow = int.MinValue; foreach (var blockRowIndex in block.Rows.Keys) { maxRow = Math.Max(maxRow, blockRowIndex); } for (; rowIndex <= maxRow; rowIndex++) { if (block.Rows.TryGetValue(rowIndex, out var row)) { yield return(row); } else { row = new object[FieldCount]; yield return(row); } } if (block.EndOfSheet || block.Rows.Count == 0) { break; } } }
private XlsRowBlock ReadNextBlock(XlsBiffStream biffStream, int startRow, int rows, int minOffset, int maxOffset) { var result = new XlsRowBlock { Rows = new Dictionary <int, Row>() }; // Ensure rows with physical records are initialized with height for (var i = 0; i < rows; i++) { if (RowOffsetMap.TryGetValue(startRow + i, out _)) { EnsureRow(result, startRow + i); } } if (minOffset == int.MaxValue) { return(result); } biffStream.Position = minOffset; XlsBiffRecord rec; XlsBiffRecord ixfe = null; while (biffStream.Position <= maxOffset && (rec = biffStream.Read()) != null) { if (rec.Id == BIFFRECORDTYPE.IXFE) { // BIFF2: If cell.xformat == 63, this contains the actual XF index >= 63 ixfe = rec; } if (rec.IsCell) { var cell = (XlsBiffBlankCell)rec; var currentRow = EnsureRow(result, cell.RowIndex); if (cell.Id == BIFFRECORDTYPE.MULRK) { var cellValues = ReadMultiCell(cell); currentRow.Cells.AddRange(cellValues); } else { var xfIndex = GetXfIndexForCell(cell, ixfe); var cellValue = ReadSingleCell(biffStream, cell, xfIndex); currentRow.Cells.Add(cellValue); } ixfe = null; } } return(result); }
/// <summary> /// Parses strings out of the SST record and subsequent Continue records from the BIFF stream /// </summary> public void ReadStrings(XlsBiffStream biffStream) { var reader = new XlsSSTReader(this, biffStream); for (var i = 0; i < UniqueCount; i++) { var s = reader.ReadString(); _strings.Add(s); } }
private object TryGetFormulaValue(XlsBiffStream biffStream, XlsBiffFormulaCell formulaCell, ExtendedFormat effectiveStyle) { return(formulaCell.FormulaType switch { XlsBiffFormulaCell.FormulaValueType.Boolean => formulaCell.BooleanValue, XlsBiffFormulaCell.FormulaValueType.Error => null, XlsBiffFormulaCell.FormulaValueType.EmptyString => string.Empty, XlsBiffFormulaCell.FormulaValueType.Number => TryConvertOADateTime(formulaCell.XNumValue, effectiveStyle.NumberFormatIndex), XlsBiffFormulaCell.FormulaValueType.String => TryGetFormulaString(biffStream, effectiveStyle), // Bad data or new formula value type _ => null, });
public IEnumerable <Row> ReadRows() { var rowIndex = 0; using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption)) { foreach (var rowBlock in ReadWorksheetRows(biffStream)) { for (; rowIndex < rowBlock.RowIndex; ++rowIndex) { yield return(new Row(rowIndex, DefaultRowHeight / 20.0, new List <Cell>())); } rowIndex++; yield return(rowBlock); } } }
private string TryGetFormulaString(XlsBiffStream biffStream) { var rec = biffStream.Read(); if (rec != null && rec.Id == BIFFRECORDTYPE.SHAREDFMLA) { rec = biffStream.Read(); } if (rec != null && rec.Id == BIFFRECORDTYPE.STRING) { var stringRecord = (XlsBiffFormulaString)rec; return(stringRecord.GetValue(Encoding)); } // Bad data - could not find a string following the formula return(null); }
internal XlsWorkbook(Stream stream, Encoding fallbackEncoding) { var probe = new byte[8]; stream.Read(probe, 0, probe.Length); stream.Seek(0, SeekOrigin.Begin); if (IsCompoundDocument(probe)) { _bytes = ReadCompoundDocument(stream); } else if (IsRawBiffStream(probe)) { _bytes = ReadWorksheetDocument(stream); } else { throw new HeaderException(Errors.ErrorHeaderSignature); } var biffStream = new XlsBiffStream(_bytes); if (biffStream.BiffVersion == 0) { throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData); } BiffVersion = biffStream.BiffVersion; Encoding = biffStream.BiffVersion == 8 ? Encoding.Unicode : fallbackEncoding; if (biffStream.BiffType == BIFFTYPE.WorkbookGlobals) { ReadWorkbookGlobals(biffStream); } else if (biffStream.BiffType == BIFFTYPE.Worksheet) { // set up 'virtual' bound sheet pointing at this Sheets.Add(new XlsBiffBoundSheet(0, XlsBiffBoundSheet.SheetType.Worksheet, XlsBiffBoundSheet.SheetVisibility.Visible, "Sheet")); } else { throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData); } }
private IEnumerable <Row> ReadWorksheetRows(XlsBiffStream biffStream) { var rowIndex = 0; while (rowIndex < RowCount) { GetBlockSize(rowIndex, out var blockRowCount, out var minOffset, out var maxOffset); var block = ReadNextBlock(biffStream, rowIndex, blockRowCount, minOffset, maxOffset); for (var i = 0; i < blockRowCount; ++i) { if (block.Rows.TryGetValue(rowIndex + i, out var row)) { yield return(row); } } rowIndex += blockRowCount; } }
private IEnumerable <Row> ReadWorksheetRows(XlsBiffStream biffStream) { var rowIndex = 0; while (rowIndex < RowCount) { // Read up to 32 rows at a time var blockRowCount = Math.Min(32, RowCount - rowIndex); var block = ReadNextBlock(biffStream, rowIndex, blockRowCount); for (var i = 0; i < blockRowCount; ++i) { if (block.Rows.TryGetValue(rowIndex + i, out var row)) { yield return(row); } } rowIndex += blockRowCount; } }
private object TryGetFormulaValue(XlsBiffStream biffStream, XlsBiffFormulaCell formulaCell, ushort xFormat) { switch (formulaCell.FormulaType) { case XlsBiffFormulaCell.FormulaValueType.Boolean: return(formulaCell.BooleanValue); case XlsBiffFormulaCell.FormulaValueType.Error: return(null); case XlsBiffFormulaCell.FormulaValueType.EmptyString: return(string.Empty); case XlsBiffFormulaCell.FormulaValueType.Number: return(TryConvertOADateTime(formulaCell.XNumValue, xFormat)); case XlsBiffFormulaCell.FormulaValueType.String: return(TryGetFormulaString(biffStream)); } // Bad data or new formula value type return(null); }
private void ReadWorksheetGlobals() { using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption)) { // Check the expected BOF record was found in the BIFF stream if (biffStream.BiffVersion == 0 || biffStream.BiffType != BIFFTYPE.Worksheet) { return; } XlsBiffHeaderFooterString header = null; XlsBiffHeaderFooterString footer = null; // Handle when dimensions report less columns than used by cell records. int maxCellColumn = 0; int maxRowCount = 0; Dictionary <int, bool> previousBlocksObservedRows = new Dictionary <int, bool>(); Dictionary <int, bool> observedRows = new Dictionary <int, bool>(); var recordOffset = biffStream.Position; XlsBiffRecord rec = biffStream.Read(); while (rec != null && !(rec is XlsBiffEof)) { if (rec is XlsBiffDimensions dims) { FieldCount = dims.LastColumn; RowCount = (int)dims.LastRow; } if (rec.Id == BIFFRECORDTYPE.DEFAULTROWHEIGHT || rec.Id == BIFFRECORDTYPE.DEFAULTROWHEIGHT_V2) { var defaultRowHeightRecord = (XlsBiffDefaultRowHeight)rec; DefaultRowHeight = defaultRowHeightRecord.RowHeight; } if (rec.Id == BIFFRECORDTYPE.RECORD1904) { IsDate1904 = ((XlsBiffSimpleValueRecord)rec).Value == 1; } if (rec.Id == BIFFRECORDTYPE.XF_V2 || rec.Id == BIFFRECORDTYPE.XF_V3 || rec.Id == BIFFRECORDTYPE.XF_V4) { ExtendedFormats.Add(rec); } if (rec.Id == BIFFRECORDTYPE.FORMAT) { var fmt = (XlsBiffFormatString)rec; if (Workbook.BiffVersion >= 5) { // fmt.Index exists on BIFF5+ only Formats.Add(fmt.Index, fmt); } else { Formats.Add((ushort)Formats.Count, fmt); } } if (rec.Id == BIFFRECORDTYPE.FORMAT_V23) { var fmt = (XlsBiffFormatString)rec; Formats.Add((ushort)Formats.Count, fmt); } if (rec.Id == BIFFRECORDTYPE.CODEPAGE) { var codePage = (XlsBiffSimpleValueRecord)rec; Encoding = EncodingHelper.GetEncoding(codePage.Value); } if (rec.Id == BIFFRECORDTYPE.HEADER && rec.RecordSize > 0) { header = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.FOOTER && rec.RecordSize > 0) { footer = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.CODENAME) { var codeName = (XlsBiffCodeName)rec; CodeName = codeName.GetValue(Encoding); } if (rec.Id == BIFFRECORDTYPE.ROW) { var rowRecord = (XlsBiffRow)rec; SetMinMaxRowOffset(rowRecord.RowIndex, recordOffset); maxRowCount = Math.Max(maxRowCount, rowRecord.RowIndex + 1); } if (rec.IsCell) { var cell = (XlsBiffBlankCell)rec; SetMinMaxRowOffset(cell.RowIndex, recordOffset); maxCellColumn = Math.Max(maxCellColumn, cell.ColumnIndex + 1); maxRowCount = Math.Max(maxRowCount, cell.RowIndex + 1); } recordOffset = biffStream.Position; rec = biffStream.Read(); } if (header != null || footer != null) { HeaderFooter = new HeaderFooter(false, false) { OddHeader = header?.GetValue(Encoding), OddFooter = footer?.GetValue(Encoding), }; } if (FieldCount < maxCellColumn) { FieldCount = maxCellColumn; } if (RowCount < maxRowCount) { RowCount = maxRowCount; } } }
private void ReadWorksheetGlobals() { using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption)) { // Check the expected BOF record was found in the BIFF stream if (biffStream.BiffVersion == 0 || biffStream.BiffType != BIFFTYPE.Worksheet) { return; } XlsBiffHeaderFooterString header = null; XlsBiffHeaderFooterString footer = null; var ixfeOffset = -1; int maxCellColumn = 0; int maxRowCount = 0; // number of rows with cell records int maxRowCountFromRowRecord = 0; // number of rows with row records var mergeCells = new List <CellRange>(); var biffFormats = new Dictionary <ushort, XlsBiffFormatString>(); var recordOffset = biffStream.Position; var rec = biffStream.Read(); var columnWidths = new List <Col>(); while (rec != null && !(rec is XlsBiffEof)) { if (rec is XlsBiffDimensions dims) { FieldCount = dims.LastColumn; RowCount = (int)dims.LastRow; } if (rec.Id == BIFFRECORDTYPE.DEFAULTROWHEIGHT || rec.Id == BIFFRECORDTYPE.DEFAULTROWHEIGHT_V2) { var defaultRowHeightRecord = (XlsBiffDefaultRowHeight)rec; DefaultRowHeight = defaultRowHeightRecord.RowHeight; } if (rec.Id == BIFFRECORDTYPE.RECORD1904) { IsDate1904 = ((XlsBiffSimpleValueRecord)rec).Value == 1; } if (rec.Id == BIFFRECORDTYPE.XF_V2 || rec.Id == BIFFRECORDTYPE.XF_V3 || rec.Id == BIFFRECORDTYPE.XF_V4) { // NOTE: XF records should only occur in raw BIFF2-4 single worksheet documents without the workbook stream, or globally in the workbook stream. // It is undefined behavior if multiple worksheets in a workbook declare XF records. Workbook.AddExtendedFormat(-1, ((XlsBiffXF)rec).Format, true); } if (rec.Id == BIFFRECORDTYPE.MERGECELLS) { mergeCells.AddRange(((XlsBiffMergeCells)rec).MergeCells); } if (rec.Id == BIFFRECORDTYPE.COLINFO) { columnWidths.Add(((XlsBiffColInfo)rec).Value); } if (rec.Id == BIFFRECORDTYPE.FORMAT) { var fmt = (XlsBiffFormatString)rec; if (Workbook.BiffVersion >= 5) { // fmt.Index exists on BIFF5+ only biffFormats.Add(fmt.Index, fmt); } else { biffFormats.Add((ushort)biffFormats.Count, fmt); } } if (rec.Id == BIFFRECORDTYPE.FORMAT_V23) { var fmt = (XlsBiffFormatString)rec; biffFormats.Add((ushort)biffFormats.Count, fmt); } if (rec.Id == BIFFRECORDTYPE.CODEPAGE) { var codePage = (XlsBiffSimpleValueRecord)rec; Encoding = EncodingHelper.GetEncoding(codePage.Value); } if (rec.Id == BIFFRECORDTYPE.HEADER && rec.RecordSize > 0) { header = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.FOOTER && rec.RecordSize > 0) { footer = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.CODENAME) { var codeName = (XlsBiffCodeName)rec; CodeName = codeName.GetValue(Encoding); } if (rec.Id == BIFFRECORDTYPE.ROW || rec.Id == BIFFRECORDTYPE.ROW_V2) { var rowRecord = (XlsBiffRow)rec; SetMinMaxRow(rowRecord.RowIndex, rowRecord); // Count rows by row records without affecting the overlap in OffsetMap maxRowCountFromRowRecord = Math.Max(maxRowCountFromRowRecord, rowRecord.RowIndex + 1); } if (rec.Id == BIFFRECORDTYPE.IXFE) { ixfeOffset = recordOffset; } if (rec.IsCell) { var cell = (XlsBiffBlankCell)rec; maxCellColumn = Math.Max(maxCellColumn, cell.ColumnIndex + 1); maxRowCount = Math.Max(maxRowCount, cell.RowIndex + 1); if (ixfeOffset != -1) { SetMinMaxRowOffset(cell.RowIndex, ixfeOffset, maxRowCount - 1); ixfeOffset = -1; } SetMinMaxRowOffset(cell.RowIndex, recordOffset, maxRowCount - 1); } recordOffset = biffStream.Position; rec = biffStream.Read(); // Stop if we find the start out a new substream. Not always that files have the required EOF before a substream BOF. if (rec is XlsBiffBOF) { break; } } if (header != null || footer != null) { HeaderFooter = new HeaderFooter(false, false) { OddHeader = header?.GetValue(Encoding), OddFooter = footer?.GetValue(Encoding), }; } foreach (var biffFormat in biffFormats) { Workbook.AddNumberFormat(biffFormat.Key, biffFormat.Value.GetValue(Encoding)); } if (mergeCells.Count > 0) { MergeCells = mergeCells.ToArray(); } if (FieldCount < maxCellColumn) { FieldCount = maxCellColumn; } maxRowCount = Math.Max(maxRowCount, maxRowCountFromRowRecord); if (RowCount < maxRowCount) { RowCount = maxRowCount; } if (columnWidths.Count > 0) { ColumnWidths = columnWidths.ToArray(); } } }
/// <summary> /// Reads additional records if needed: a string record might follow a formula result /// </summary> private Cell ReadSingleCell(XlsBiffStream biffStream, XlsBiffBlankCell cell, ushort xFormat) { LogManager.Log(this).Debug("ReadSingleCell {0}", cell.Id); double doubleValue; int intValue; object objectValue; var result = new Cell() { ColumnIndex = cell.ColumnIndex }; switch (cell.Id) { case BIFFRECORDTYPE.BOOLERR: if (cell.ReadByte(7) == 0) { result.Value = cell.ReadByte(6) != 0; } break; case BIFFRECORDTYPE.BOOLERR_OLD: if (cell.ReadByte(8) == 0) { result.Value = cell.ReadByte(7) != 0; } break; case BIFFRECORDTYPE.INTEGER: case BIFFRECORDTYPE.INTEGER_OLD: intValue = ((XlsBiffIntegerCell)cell).Value; result.Value = TryConvertOADateTime(intValue, xFormat); break; case BIFFRECORDTYPE.NUMBER: case BIFFRECORDTYPE.NUMBER_OLD: doubleValue = ((XlsBiffNumberCell)cell).Value; result.Value = TryConvertOADateTime(doubleValue, xFormat); break; case BIFFRECORDTYPE.LABEL: case BIFFRECORDTYPE.LABEL_OLD: case BIFFRECORDTYPE.RSTRING: result.Value = ((XlsBiffLabelCell)cell).GetValue(Encoding); break; case BIFFRECORDTYPE.LABELSST: result.Value = Workbook.SST.GetString(((XlsBiffLabelSSTCell)cell).SSTIndex, Encoding); break; case BIFFRECORDTYPE.RK: doubleValue = ((XlsBiffRKCell)cell).Value; result.Value = TryConvertOADateTime(doubleValue, xFormat); break; case BIFFRECORDTYPE.BLANK: case BIFFRECORDTYPE.BLANK_OLD: case BIFFRECORDTYPE.MULBLANK: // Skip blank cells break; case BIFFRECORDTYPE.FORMULA: case BIFFRECORDTYPE.FORMULA_V3: case BIFFRECORDTYPE.FORMULA_V4: objectValue = TryGetFormulaValue(biffStream, (XlsBiffFormulaCell)cell, xFormat); result.Value = objectValue; break; } LogManager.Log(this).Debug("VALUE: {0}", result.Value); return(result); }
private XlsRowBlock ReadNextBlock(XlsBiffStream biffStream, int startRow, int rows) { var result = new XlsRowBlock { Rows = new Dictionary <int, Row>() }; XlsBiffRecord rec; XlsBiffRecord ixfe = null; if (!GetMinMaxOffsetsForRowBlock(startRow, rows, out var minOffset, out var maxOffset)) { return(result); } biffStream.Position = minOffset; while (biffStream.Position <= maxOffset && (rec = biffStream.Read()) != null) { if (rec.Id == BIFFRECORDTYPE.ROW || rec.Id == BIFFRECORDTYPE.ROW_V2) { var rowRecord = (XlsBiffRow)rec; var currentRow = EnsureRow(result, rowRecord.RowIndex); currentRow.Height = (rowRecord.UseDefaultRowHeight ? DefaultRowHeight : rowRecord.RowHeight) / 20.0; } if (rec.Id == BIFFRECORDTYPE.IXFE) { // BIFF2: If cell.xformat == 63, this contains the actual XF index >= 63 ixfe = rec; } if (rec.IsCell) { var cell = (XlsBiffBlankCell)rec; var currentRow = EnsureRow(result, cell.RowIndex); if (cell.Id == BIFFRECORDTYPE.MULRK) { var cellValues = ReadMultiCell(cell); currentRow.Cells.AddRange(cellValues); } else { ushort xFormat; if (Workbook.BiffVersion == 2 && cell.XFormat == 63 && ixfe != null) { xFormat = ixfe.ReadUInt16(0); } else { xFormat = cell.XFormat; } var cellValue = ReadSingleCell(biffStream, cell, xFormat); currentRow.Cells.Add(cellValue); } ixfe = null; } } return(result); }
private void ReadWorkbookGlobals(XlsBiffStream biffStream) { XlsBiffRecord rec; var biffFormats = new Dictionary <ushort, XlsBiffFormatString>(); while ((rec = biffStream.Read()) != null && rec.Id != BIFFRECORDTYPE.EOF) { switch (rec.Id) { case BIFFRECORDTYPE.INTERFACEHDR: InterfaceHdr = (XlsBiffInterfaceHdr)rec; break; case BIFFRECORDTYPE.BOUNDSHEET: XlsBiffBoundSheet sheet = (XlsBiffBoundSheet)rec; if (sheet.Type != XlsBiffBoundSheet.SheetType.Worksheet) { break; } Sheets.Add(sheet); break; case BIFFRECORDTYPE.MMS: Mms = rec; break; case BIFFRECORDTYPE.COUNTRY: Country = rec; break; case BIFFRECORDTYPE.CODEPAGE: // [MS-XLS 2.4.52 CodePage] An unsigned integer that specifies the workbook’s code page.The value MUST be one // of the code page values specified in [CODEPG] or the special value 1200, which means that the // workbook is Unicode. CodePage = (XlsBiffSimpleValueRecord)rec; Encoding = EncodingHelper.GetEncoding(CodePage.Value); break; case BIFFRECORDTYPE.FONT: case BIFFRECORDTYPE.FONT_V34: Fonts.Add(rec); break; case BIFFRECORDTYPE.FORMAT_V23: { var fmt = (XlsBiffFormatString)rec; biffFormats.Add((ushort)biffFormats.Count, fmt); } break; case BIFFRECORDTYPE.FORMAT: { var fmt = (XlsBiffFormatString)rec; biffFormats.Add(fmt.Index, fmt); } break; case BIFFRECORDTYPE.XF: case BIFFRECORDTYPE.XF_V4: case BIFFRECORDTYPE.XF_V3: case BIFFRECORDTYPE.XF_V2: ExtendedFormats.Add((XlsBiffXF)rec); break; case BIFFRECORDTYPE.SST: SST = (XlsBiffSST)rec; SST.ReadStrings(biffStream); break; case BIFFRECORDTYPE.CONTINUE: break; case BIFFRECORDTYPE.EXTSST: ExtSST = rec; break; case BIFFRECORDTYPE.PASSWORD: break; case BIFFRECORDTYPE.PROTECT: case BIFFRECORDTYPE.PROT4REVPASSWORD: // IsProtected break; case BIFFRECORDTYPE.RECORD1904: IsDate1904 = ((XlsBiffSimpleValueRecord)rec).Value == 1; break; default: break; } } foreach (var biffFormat in biffFormats) { var formatString = biffFormat.Value.GetValue(Encoding); Formats.Add(biffFormat.Key, new NumberFormatString(formatString)); } }
private void ReadWorksheetGlobals() { using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption)) { // Check the expected BOF record was found in the BIFF stream if (biffStream.BiffVersion == 0 || biffStream.BiffType != BIFFTYPE.Worksheet) { return; } XlsBiffHeaderFooterString header = null; XlsBiffHeaderFooterString footer = null; int maxCellColumn = 0; int maxRowCount = 0; var mergeCells = new List <CellRange>(); var biffFormats = new Dictionary <ushort, XlsBiffFormatString>(); var recordOffset = biffStream.Position; var rec = biffStream.Read(); while (rec != null && !(rec is XlsBiffEof)) { if (rec is XlsBiffDimensions dims) { FieldCount = dims.LastColumn; RowCount = (int)dims.LastRow; } if (rec.Id == BIFFRECORDTYPE.DEFAULTROWHEIGHT || rec.Id == BIFFRECORDTYPE.DEFAULTROWHEIGHT_V2) { var defaultRowHeightRecord = (XlsBiffDefaultRowHeight)rec; DefaultRowHeight = defaultRowHeightRecord.RowHeight; } if (rec.Id == BIFFRECORDTYPE.RECORD1904) { IsDate1904 = ((XlsBiffSimpleValueRecord)rec).Value == 1; } if (rec.Id == BIFFRECORDTYPE.XF_V2 || rec.Id == BIFFRECORDTYPE.XF_V3 || rec.Id == BIFFRECORDTYPE.XF_V4) { ExtendedFormats.Add((XlsBiffXF)rec); } if (rec.Id == BIFFRECORDTYPE.MERGECELLS) { mergeCells.AddRange(((XlsBiffMergeCells)rec).MergeCells); } if (rec.Id == BIFFRECORDTYPE.FORMAT) { var fmt = (XlsBiffFormatString)rec; if (Workbook.BiffVersion >= 5) { // fmt.Index exists on BIFF5+ only biffFormats.Add(fmt.Index, fmt); } else { biffFormats.Add((ushort)biffFormats.Count, fmt); } } if (rec.Id == BIFFRECORDTYPE.FORMAT_V23) { var fmt = (XlsBiffFormatString)rec; biffFormats.Add((ushort)biffFormats.Count, fmt); } if (rec.Id == BIFFRECORDTYPE.CODEPAGE) { var codePage = (XlsBiffSimpleValueRecord)rec; Encoding = EncodingHelper.GetEncoding(codePage.Value); } if (rec.Id == BIFFRECORDTYPE.HEADER && rec.RecordSize > 0) { header = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.FOOTER && rec.RecordSize > 0) { footer = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.CODENAME) { var codeName = (XlsBiffCodeName)rec; CodeName = codeName.GetValue(Encoding); } if (rec.Id == BIFFRECORDTYPE.ROW) { var rowRecord = (XlsBiffRow)rec; SetMinMaxRowOffset(rowRecord.RowIndex, recordOffset); maxRowCount = Math.Max(maxRowCount, rowRecord.RowIndex + 1); } if (rec.IsCell) { var cell = (XlsBiffBlankCell)rec; SetMinMaxRowOffset(cell.RowIndex, recordOffset); maxCellColumn = Math.Max(maxCellColumn, cell.ColumnIndex + 1); maxRowCount = Math.Max(maxRowCount, cell.RowIndex + 1); } recordOffset = biffStream.Position; rec = biffStream.Read(); } if (header != null || footer != null) { HeaderFooter = new HeaderFooter(false, false) { OddHeader = header?.GetValue(Encoding), OddFooter = footer?.GetValue(Encoding), }; } foreach (var biffFormat in biffFormats) { var formatString = biffFormat.Value.GetValue(Encoding); Formats.Add(biffFormat.Key, new NumberFormatString(formatString)); } if (mergeCells.Count > 0) { MergeCells = mergeCells.ToArray(); } if (FieldCount < maxCellColumn) { FieldCount = maxCellColumn; } if (RowCount < maxRowCount) { RowCount = maxRowCount; } } }
private void ReadWorksheetGlobals() { var biffStream = new XlsBiffStream(Bytes, (int)DataOffset, Workbook.BiffVersion); // Check the expected BOF record was found in the BIFF stream if (biffStream.BiffVersion == 0 || biffStream.BiffType != BIFFTYPE.Worksheet) { return; } XlsBiffHeaderFooterString header = null; XlsBiffHeaderFooterString footer = null; // Handle when dimensions report less columns than used by cell records. int maxCellColumn = 0; Dictionary <int, bool> previousBlocksObservedRows = new Dictionary <int, bool>(); Dictionary <int, bool> observedRows = new Dictionary <int, bool>(); XlsBiffRecord rec = biffStream.Read(); while (rec != null && !(rec is XlsBiffEof)) { if (rec is XlsBiffDimensions dims) { FieldCount = dims.LastColumn; } if (rec.Id == BIFFRECORDTYPE.RECORD1904) { IsDate1904 = ((XlsBiffSimpleValueRecord)rec).Value == 1; } if (rec.Id == BIFFRECORDTYPE.XF_V2 || rec.Id == BIFFRECORDTYPE.XF_V3 || rec.Id == BIFFRECORDTYPE.XF_V4) { ExtendedFormats.Add(rec); } if (rec.Id == BIFFRECORDTYPE.FORMAT) { var fmt = (XlsBiffFormatString)rec; if (Workbook.BiffVersion >= 5) { // fmt.Index exists on BIFF5+ only Formats.Add(fmt.Index, fmt); } else { Formats.Add((ushort)Formats.Count, fmt); } } if (rec.Id == BIFFRECORDTYPE.FORMAT_V23) { var fmt = (XlsBiffFormatString)rec; Formats.Add((ushort)Formats.Count, fmt); } if (rec.Id == BIFFRECORDTYPE.CODEPAGE) { var codePage = (XlsBiffSimpleValueRecord)rec; Encoding = EncodingHelper.GetEncoding(codePage.Value); } if (rec.Id == BIFFRECORDTYPE.HEADER && rec.RecordSize > 0) { header = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.FOOTER && rec.RecordSize > 0) { footer = (XlsBiffHeaderFooterString)rec; } if (rec.Id == BIFFRECORDTYPE.CODENAME) { var codeName = (XlsBiffCodeName)rec; CodeName = codeName.GetValue(Encoding); } if (!RowContentInMultipleBlocks && rec is XlsBiffDbCell) { foreach (int row in observedRows.Keys) { previousBlocksObservedRows[row] = true; } observedRows.Clear(); } if (rec is XlsBiffBlankCell cell) { maxCellColumn = Math.Max(maxCellColumn, cell.ColumnIndex + 1); if (!RowContentInMultipleBlocks) { if (previousBlocksObservedRows.ContainsKey(cell.RowIndex)) { RowContentInMultipleBlocks = true; previousBlocksObservedRows.Clear(); observedRows.Clear(); } observedRows[cell.RowIndex] = true; } } rec = biffStream.Read(); } if (header != null || footer != null) { HeaderFooter = new HeaderFooter(false, false) { OddHeader = header?.GetValue(Encoding), OddFooter = footer?.GetValue(Encoding), }; } if (FieldCount < maxCellColumn) { FieldCount = maxCellColumn; } }
private XlsRowBlock ReadNextBlock(XlsBiffStream biffStream) { var result = new XlsRowBlock { Rows = new Dictionary <int, object[]>() }; var currentRowIndex = -1; object[] currentRow = null; XlsBiffRecord rec; XlsBiffRecord ixfe = null; while ((rec = biffStream.Read()) != null) { if (rec is XlsBiffEof) { result.EndOfSheet = true; break; } if (rec is XlsBiffMSODrawing || (!RowContentInMultipleBlocks && rec is XlsBiffDbCell)) { break; } if (rec.Id == BIFFRECORDTYPE.IXFE) { // BIFF2: If cell.xformat == 63, this contains the actual XF index >= 63 ixfe = rec; } if (rec is XlsBiffBlankCell cell) { // In most cases cells are grouped by row if (currentRowIndex != cell.RowIndex) { if (!result.Rows.TryGetValue(cell.RowIndex, out currentRow)) { currentRow = new object[FieldCount]; result.Rows.Add(cell.RowIndex, currentRow); } currentRowIndex = cell.RowIndex; } ushort xFormat; if (Workbook.BiffVersion == 2 && cell.XFormat == 63 && ixfe != null) { xFormat = ixfe.ReadUInt16(0); } else { xFormat = cell.XFormat; } var additionalRecords = new List <XlsBiffRecord>(); while (!PushCellValue(currentRow, cell, xFormat, additionalRecords)) { var additionalRecord = biffStream.Read(); additionalRecords.Add(additionalRecord); } ixfe = null; } } return(result); }
private void ReadWorkbookGlobals(XlsBiffStream biffStream) { bool sst = false; XlsBiffRecord rec; while ((rec = biffStream.Read()) != null) { switch (rec.Id) { case BIFFRECORDTYPE.INTERFACEHDR: InterfaceHdr = (XlsBiffInterfaceHdr)rec; break; case BIFFRECORDTYPE.BOUNDSHEET: XlsBiffBoundSheet sheet = (XlsBiffBoundSheet)rec; if (sheet.Type != XlsBiffBoundSheet.SheetType.Worksheet) { break; } Sheets.Add(sheet); break; case BIFFRECORDTYPE.MMS: Mms = rec; break; case BIFFRECORDTYPE.COUNTRY: Country = rec; break; case BIFFRECORDTYPE.CODEPAGE: // [MS-XLS 2.4.52 CodePage] An unsigned integer that specifies the workbook’s code page.The value MUST be one // of the code page values specified in [CODEPG] or the special value 1200, which means that the // workbook is Unicode. CodePage = (XlsBiffSimpleValueRecord)rec; Encoding = EncodingHelper.GetEncoding(CodePage.Value); break; case BIFFRECORDTYPE.FONT: case BIFFRECORDTYPE.FONT_V34: Fonts.Add(rec); break; case BIFFRECORDTYPE.FORMAT_V23: { var fmt = (XlsBiffFormatString)rec; Formats.Add((ushort)Formats.Count, fmt); } break; case BIFFRECORDTYPE.FORMAT: { var fmt = (XlsBiffFormatString)rec; Formats.Add(fmt.Index, fmt); } break; case BIFFRECORDTYPE.XF: case BIFFRECORDTYPE.XF_V4: case BIFFRECORDTYPE.XF_V3: case BIFFRECORDTYPE.XF_V2: ExtendedFormats.Add(rec); break; case BIFFRECORDTYPE.SST: SST = (XlsBiffSST)rec; sst = true; break; case BIFFRECORDTYPE.CONTINUE: if (!sst) { break; } XlsBiffContinue contSST = (XlsBiffContinue)rec; SST.Append(contSST); break; case BIFFRECORDTYPE.EXTSST: ExtSST = rec; sst = false; break; case BIFFRECORDTYPE.PASSWORD: break; case BIFFRECORDTYPE.PROTECT: case BIFFRECORDTYPE.PROT4REVPASSWORD: // IsProtected break; case BIFFRECORDTYPE.RECORD1904: IsDate1904 = ((XlsBiffSimpleValueRecord)rec).Value == 1; break; case BIFFRECORDTYPE.EOF: SST?.ReadStrings(); return; default: continue; } } }
/// <summary> /// Reads additional records if needed: a string record might follow a formula result /// </summary> private Cell ReadSingleCell(XlsBiffStream biffStream, XlsBiffBlankCell cell, int xfIndex) { LogManager.Log(this).Debug("ReadSingleCell {0}", cell.Id); var effectiveStyle = Workbook.GetEffectiveCellStyle(xfIndex, cell.Format); var numberFormatIndex = effectiveStyle.NumberFormatIndex; object value = null; switch (cell.Id) { case BIFFRECORDTYPE.BOOLERR: if (cell.ReadByte(7) == 0) { value = cell.ReadByte(6) != 0; } break; case BIFFRECORDTYPE.BOOLERR_OLD: if (cell.ReadByte(8) == 0) { value = cell.ReadByte(7) != 0; } break; case BIFFRECORDTYPE.INTEGER: case BIFFRECORDTYPE.INTEGER_OLD: value = TryConvertOADateTime(((XlsBiffIntegerCell)cell).Value, numberFormatIndex); break; case BIFFRECORDTYPE.NUMBER: case BIFFRECORDTYPE.NUMBER_OLD: value = TryConvertOADateTime(((XlsBiffNumberCell)cell).Value, numberFormatIndex); break; case BIFFRECORDTYPE.LABEL: case BIFFRECORDTYPE.LABEL_OLD: case BIFFRECORDTYPE.RSTRING: value = GetLabelString((XlsBiffLabelCell)cell, effectiveStyle); break; case BIFFRECORDTYPE.LABELSST: value = Workbook.SST.GetString(((XlsBiffLabelSSTCell)cell).SSTIndex, Encoding); break; case BIFFRECORDTYPE.RK: value = TryConvertOADateTime(((XlsBiffRKCell)cell).Value, numberFormatIndex); break; case BIFFRECORDTYPE.BLANK: case BIFFRECORDTYPE.BLANK_OLD: case BIFFRECORDTYPE.MULBLANK: // Skip blank cells break; case BIFFRECORDTYPE.FORMULA: case BIFFRECORDTYPE.FORMULA_V3: case BIFFRECORDTYPE.FORMULA_V4: value = TryGetFormulaValue(biffStream, (XlsBiffFormulaCell)cell, effectiveStyle); break; } return(new Cell(cell.ColumnIndex, value, effectiveStyle)); }