Ejemplo n.º 1
0
        internal XlsWorkbook(Stream stream, string password, Encoding fallbackEncoding)
        {
            Stream = stream;

            using (var biffStream = new XlsBiffStream(stream, 0, 0, password))
            {
                if (biffStream.BiffVersion == 0)
                {
                    throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData);
                }

                BiffVersion = biffStream.BiffVersion;
                SecretKey   = biffStream.SecretKey;
                Encryption  = biffStream.Encryption;
                Encoding    = biffStream.BiffVersion == 8 ? Encoding.Unicode : fallbackEncoding;

                if (biffStream.BiffType == BIFFTYPE.WorkbookGlobals)
                {
                    ReadWorkbookGlobals(biffStream);
                }
                else if (biffStream.BiffType == BIFFTYPE.Worksheet)
                {
                    // set up 'virtual' bound sheet pointing at this
                    Sheets.Add(new XlsBiffBoundSheet(0, XlsBiffBoundSheet.SheetType.Worksheet, XlsBiffBoundSheet.SheetVisibility.Visible, "Sheet"));
                }
                else
                {
                    throw new ExcelReaderException(Errors.ErrorWorkbookGlobalsInvalidData);
                }
            }
        }
Ejemplo n.º 2
0
        private XlsRowBlock ReadNextBlock(XlsBiffStream biffStream, int startRow, int rows, int minOffset, int maxOffset)
        {
            var result = new XlsRowBlock {
                Rows = new Dictionary <int, Row>()
            };

            // Ensure rows with physical records are initialized with height
            for (var i = 0; i < rows; i++)
            {
                if (RowOffsetMap.TryGetValue(startRow + i, out _))
                {
                    EnsureRow(result, startRow + i);
                }
            }

            if (minOffset == int.MaxValue)
            {
                return(result);
            }

            biffStream.Position = minOffset;

            XlsBiffRecord rec;
            XlsBiffRecord ixfe = null;

            while (biffStream.Position <= maxOffset && (rec = biffStream.Read()) != null)
            {
                if (rec.Id == BIFFRECORDTYPE.IXFE)
                {
                    // BIFF2: If cell.xformat == 63, this contains the actual XF index >= 63
                    ixfe = rec;
                }

                if (rec is XlsBiffBlankCell cell)
                {
                    var currentRow = EnsureRow(result, cell.RowIndex);

                    if (cell.Id == BIFFRECORDTYPE.MULRK)
                    {
                        var cellValues = ReadMultiCell(cell);
                        currentRow.Cells.AddRange(cellValues);
                    }
                    else
                    {
                        var xfIndex   = GetXfIndexForCell(cell, ixfe);
                        var cellValue = ReadSingleCell(biffStream, cell, xfIndex);
                        currentRow.Cells.Add(cellValue);
                    }

                    ixfe = null;
                }
            }

            return(result);
        }
Ejemplo n.º 3
0
        public IEnumerable <Row> ReadRows()
        {
            var rowIndex = 0;

            using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption))
            {
                foreach (var rowBlock in ReadWorksheetRows(biffStream))
                {
                    for (; rowIndex < rowBlock.RowIndex; ++rowIndex)
                    {
                        yield return(new Row(rowIndex, DefaultRowHeight / 20.0, new List <Cell>()));
                    }

                    rowIndex++;
                    yield return(rowBlock);
                }
            }
        }
Ejemplo n.º 4
0
        private string TryGetFormulaString(XlsBiffStream biffStream, ExtendedFormat effectiveStyle)
        {
            var rec = biffStream.Read();

            if (rec != null && rec.Id == BIFFRECORDTYPE.SHAREDFMLA)
            {
                rec = biffStream.Read();
            }

            if (rec != null && rec.Id == BIFFRECORDTYPE.STRING)
            {
                var stringRecord    = (XlsBiffFormulaString)rec;
                var formulaEncoding = GetFont(effectiveStyle.FontIndex)?.ByteStringEncoding ?? Encoding; // Workbook.GetFontEncodingFromXF(xFormat) ?? Encoding;
                return(stringRecord.GetValue(formulaEncoding));
            }

            // Bad data - could not find a string following the formula
            return(null);
        }
Ejemplo n.º 5
0
        private object TryGetFormulaValue(XlsBiffStream biffStream, XlsBiffFormulaCell formulaCell, ExtendedFormat effectiveStyle, out CellError?error)
        {
            error = null;
            switch (formulaCell.FormulaType)
            {
            case XlsBiffFormulaCell.FormulaValueType.Boolean: return(formulaCell.BooleanValue);

            case XlsBiffFormulaCell.FormulaValueType.Error:
                error = (CellError)formulaCell.ErrorValue;
                return(null);

            case XlsBiffFormulaCell.FormulaValueType.EmptyString: return(string.Empty);

            case XlsBiffFormulaCell.FormulaValueType.Number: return(TryConvertOADateTime(formulaCell.XNumValue, effectiveStyle.NumberFormatIndex));

            case XlsBiffFormulaCell.FormulaValueType.String: return(TryGetFormulaString(biffStream, effectiveStyle));

            // Bad data or new formula value type
            default: return(null);
            }
        }
Ejemplo n.º 6
0
        private IEnumerable <Row> ReadWorksheetRows(XlsBiffStream biffStream)
        {
            var rowIndex = 0;

            while (rowIndex < RowCount)
            {
                GetBlockSize(rowIndex, out var blockRowCount, out var minOffset, out var maxOffset);

                var block = ReadNextBlock(biffStream, rowIndex, blockRowCount, minOffset, maxOffset);

                for (var i = 0; i < blockRowCount; ++i)
                {
                    if (block.Rows.TryGetValue(rowIndex + i, out var row))
                    {
                        yield return(row);
                    }
                }

                rowIndex += blockRowCount;
            }
        }
		private void Dispose(bool disposing)
		{
			// Check to see if Dispose has already been called.
			if (!this.disposed)
			{
				if (disposing)
				{
					if (m_workbookData != null) m_workbookData.Dispose();

					if (m_sheets != null) m_sheets.Clear();
				}

				m_workbookData = null;
				m_sheets = null;
				m_stream = null;
				m_globals = null;
				m_encoding = null;
				m_hdr = null;

				disposed = true;
			}
		}
		private void fail(string message)
		{
			m_exceptionMessage = message;
			m_isValid = false;

			m_file.Close();
			m_isClosed = true;

			m_workbookData = null;
			m_sheets = null;
			m_stream = null;
			m_globals = null;
			m_encoding = null;
			m_hdr = null;
		}
		private void readWorkBookGlobals()
		{
			//Read Header
			try
			{
				m_hdr = XlsHeader.ReadHeader(m_file);
			}
			catch (Exceptions.HeaderException ex)
			{
				fail(ex.Message);
				return;
			}
			catch (FormatException ex)
			{
				fail(ex.Message);
				return;
			}

			XlsRootDirectory dir = new XlsRootDirectory(m_hdr);
			XlsDirectoryEntry workbookEntry = dir.FindEntry(WORKBOOK) ?? dir.FindEntry(BOOK);

			if (workbookEntry == null)
			{ fail(Errors.ErrorStreamWorkbookNotFound); return; }

			if (workbookEntry.EntryType != STGTY.STGTY_STREAM)
			{ fail(Errors.ErrorWorkbookIsNotStream); return; }

			m_stream = new XlsBiffStream(m_hdr, workbookEntry.StreamFirstSector);

			m_globals = new XlsWorkbookGlobals();

			m_stream.Seek(0, SeekOrigin.Begin);

			XlsBiffRecord rec = m_stream.Read();
			XlsBiffBOF bof = rec as XlsBiffBOF;

			if (bof == null || bof.Type != BIFFTYPE.WorkbookGlobals)
			{ fail(Errors.ErrorWorkbookGlobalsInvalidData); return; }

			bool sst = false;

			m_version = bof.Version;
			m_sheets = new List<XlsWorksheet>();

			while (null != (rec = m_stream.Read()))
			{
				switch (rec.ID)
				{
					case BIFFRECORDTYPE.INTERFACEHDR:
						m_globals.InterfaceHdr = (XlsBiffInterfaceHdr)rec;
						break;
					case BIFFRECORDTYPE.BOUNDSHEET:
						XlsBiffBoundSheet sheet = (XlsBiffBoundSheet)rec;

						if (sheet.Type != XlsBiffBoundSheet.SheetType.Worksheet) break;

						sheet.IsV8 = isV8();
						sheet.UseEncoding = m_encoding;

						m_sheets.Add(new XlsWorksheet(m_globals.Sheets.Count, sheet));
						m_globals.Sheets.Add(sheet);

						break;
					case BIFFRECORDTYPE.MMS:
						m_globals.MMS = rec;
						break;
					case BIFFRECORDTYPE.COUNTRY:
						m_globals.Country = rec;
						break;
					case BIFFRECORDTYPE.CODEPAGE:

						m_globals.CodePage = (XlsBiffSimpleValueRecord)rec;

						try
						{
							m_encoding = Encoding.GetEncoding(m_globals.CodePage.Value);
						}
						catch (ArgumentException)
						{
							// Warning - Password protection
							// TODO: Attach to ILog
						}

						break;
					case BIFFRECORDTYPE.FONT:
					case BIFFRECORDTYPE.FONT_V34:
						m_globals.Fonts.Add(rec);
						break;
					case BIFFRECORDTYPE.FORMAT:
					case BIFFRECORDTYPE.FORMAT_V23:
						m_globals.Formats.Add(rec);
						break;
					case BIFFRECORDTYPE.XF:
					case BIFFRECORDTYPE.XF_V4:
					case BIFFRECORDTYPE.XF_V3:
					case BIFFRECORDTYPE.XF_V2:
						m_globals.ExtendedFormats.Add(rec);
						break;
					case BIFFRECORDTYPE.SST:
						m_globals.SST = (XlsBiffSST)rec;
						sst = true;
						break;
					case BIFFRECORDTYPE.CONTINUE:
						if (!sst) break;
						XlsBiffContinue contSST = (XlsBiffContinue)rec;
						m_globals.SST.Append(contSST);
						break;
					case BIFFRECORDTYPE.EXTSST:
						m_globals.ExtSST = rec;
						sst = false;
						break;
					case BIFFRECORDTYPE.PROTECT:
					case BIFFRECORDTYPE.PASSWORD:
					case BIFFRECORDTYPE.PROT4REVPASSWORD:
						//IsProtected
						break;
					case BIFFRECORDTYPE.EOF:
						if (m_globals.SST != null)
							m_globals.SST.ReadStrings();
						return;

					default:
						continue;
				}
			}
		}
Ejemplo n.º 10
0
		private void ParseXlsStream(Stream fileStream)
		{
			using (m_file = fileStream)
			{
				m_hdr = XlsHeader.ReadHeader(m_file);
				XlsRootDirectory dir = new XlsRootDirectory(m_hdr);
				XlsDirectoryEntry workbookEntry = dir.FindEntry(WORKBOOK) ?? dir.FindEntry(BOOK);

				if (workbookEntry == null)
					throw new FileNotFoundException(Errors.ErrorStreamWorkbookNotFound);
				if (workbookEntry.EntryType != STGTY.STGTY_STREAM)
					throw new FormatException(Errors.ErrorWorkbookIsNotStream);

				m_stream = new XlsBiffStream(m_hdr, workbookEntry.StreamFirstSector);

				ReadWorkbookGlobals();

				m_workbookData = new DataSet();

				for (int i = 0; i < m_sheets.Count; i++)
				{
					if (ReadWorksheet(m_sheets[i]))
						m_workbookData.Tables.Add(m_sheets[i].Data);
				}

				m_globals.SST = null;
				m_globals = null;
				m_sheets = null;
				m_stream = null;
				m_hdr = null;

				GC.Collect();
				GC.SuppressFinalize(this);
			}
		}
Ejemplo n.º 11
0
        private void ReadWorkbookGlobals(XlsBiffStream biffStream)
        {
            var formats = new Dictionary <int, XlsBiffFormatString>();

            XlsBiffRecord rec;

            while ((rec = biffStream.Read()) != null && !(rec is XlsBiffEof))
            {
                switch (rec)
                {
                case XlsBiffInterfaceHdr hdr:
                    InterfaceHdr = hdr;
                    break;

                case XlsBiffBoundSheet sheet:
                    if (sheet.Type != XlsBiffBoundSheet.SheetType.Worksheet)
                    {
                        break;
                    }
                    Sheets.Add(sheet);
                    break;

                case XlsBiffSimpleValueRecord codePage when rec.Id == BIFFRECORDTYPE.CODEPAGE:
                    // [MS-XLS 2.4.52 CodePage] An unsigned integer that specifies the workbook�s code page.The value MUST be one
                    // of the code page values specified in [CODEPG] or the special value 1200, which means that the
                    // workbook is Unicode.
                    CodePage = codePage;
                    Encoding = EncodingHelper.GetEncoding(CodePage.Value);
                    break;

                case XlsBiffSimpleValueRecord is1904 when rec.Id == BIFFRECORDTYPE.RECORD1904:
                    IsDate1904 = is1904.Value == 1;
                    break;

                case XlsBiffFont font:
                    Fonts.Add(font);
                    break;

                case XlsBiffFormatString format23 when rec.Id == BIFFRECORDTYPE.FORMAT_V23:
                    formats.Add((ushort)formats.Count, format23);
                    break;

                case XlsBiffFormatString fmt when rec.Id == BIFFRECORDTYPE.FORMAT:
                    var index = fmt.Index;
                    if (!formats.ContainsKey(index))
                    {
                        formats.Add(index, fmt);
                    }
                    break;

                case XlsBiffXF xf:
                    AddXf(xf);
                    break;

                case XlsBiffSST sst:
                    SST = sst;
                    break;

                case XlsBiffContinue sstContinue:
                    if (SST != null)
                    {
                        SST.ReadContinueStrings(sstContinue);
                    }

                    break;

                case XlsBiffRecord _ when rec.Id == BIFFRECORDTYPE.MMS:
                    Mms = rec;
                    break;

                case XlsBiffRecord _ when rec.Id == BIFFRECORDTYPE.COUNTRY:
                    Country = rec;
                    break;

                case XlsBiffRecord _ when rec.Id == BIFFRECORDTYPE.EXTSST:
                    ExtSST = rec;
                    break;

                // case BIFFRECORDTYPE.PROTECT:
                // case BIFFRECORDTYPE.PROT4REVPASSWORD:
                // IsProtected
                // break;
                // case BIFFRECORDTYPE.PASSWORD:
                default:
                    break;
                }
            }

            if (SST != null)
            {
                SST.Flush();
            }

            foreach (var format in formats)
            {
                // We don't decode the value until here in-case there are format records before the
                // codepage record.
                Formats.Add(format.Key, new NumberFormatString(format.Value.GetValue(Encoding)));
            }
        }
Ejemplo n.º 12
0
        private void ReadWorksheetGlobals()
        {
            using (var biffStream = new XlsBiffStream(Stream, (int)DataOffset, Workbook.BiffVersion, null, Workbook.SecretKey, Workbook.Encryption))
            {
                // Check the expected BOF record was found in the BIFF stream
                if (biffStream.BiffVersion == 0 || biffStream.BiffType != BIFFTYPE.Worksheet)
                {
                    return;
                }

                XlsBiffHeaderFooterString header = null;
                XlsBiffHeaderFooterString footer = null;

                var ixfeOffset = -1;

                int maxCellColumn            = 0;
                int maxRowCount              = 0; // number of rows with cell records
                int maxRowCountFromRowRecord = 0; // number of rows with row records

                var mergeCells   = new List <CellRange>();
                var biffFormats  = new Dictionary <ushort, XlsBiffFormatString>();
                var recordOffset = biffStream.Position;
                var rec          = biffStream.Read();
                var columnWidths = new List <Column>();

                while (rec != null && !(rec is XlsBiffEof))
                {
                    switch (rec)
                    {
                    case XlsBiffDimensions dims:
                        FieldCount = dims.LastColumn;
                        RowCount   = (int)dims.LastRow;
                        break;

                    case XlsBiffDefaultRowHeight defaultRowHeightRecord:
                        DefaultRowHeight = defaultRowHeightRecord.RowHeight;
                        break;

                    case XlsBiffSimpleValueRecord is1904 when rec.Id == BIFFRECORDTYPE.RECORD1904:
                        IsDate1904 = is1904.Value == 1;
                        break;

                    case XlsBiffXF xf when rec.Id == BIFFRECORDTYPE.XF_V2 || rec.Id == BIFFRECORDTYPE.XF_V3 || rec.Id == BIFFRECORDTYPE.XF_V4:
                        // NOTE: XF records should only occur in raw BIFF2-4 single worksheet documents without the workbook stream, or globally in the workbook stream.
                        // It is undefined behavior if multiple worksheets in a workbook declare XF records.
                        Workbook.AddXf(xf);
                        break;

                    case XlsBiffMergeCells mc:
                        mergeCells.AddRange(mc.MergeCells);
                        break;

                    case XlsBiffColInfo colInfo:
                        columnWidths.Add(colInfo.Value);
                        break;

                    case XlsBiffFormatString fmt when rec.Id == BIFFRECORDTYPE.FORMAT:
                        if (Workbook.BiffVersion >= 5)
                        {
                            // fmt.Index exists on BIFF5+ only
                            biffFormats.Add(fmt.Index, fmt);
                        }
                        else
                        {
                            biffFormats.Add((ushort)biffFormats.Count, fmt);
                        }

                        break;

                    case XlsBiffFormatString fmt23 when rec.Id == BIFFRECORDTYPE.FORMAT_V23:
                        biffFormats.Add((ushort)biffFormats.Count, fmt23);
                        break;

                    case XlsBiffSimpleValueRecord codePage when rec.Id == BIFFRECORDTYPE.CODEPAGE:
                        Encoding = EncodingHelper.GetEncoding(codePage.Value);
                        break;

                    case XlsBiffHeaderFooterString h when rec.Id == BIFFRECORDTYPE.HEADER && rec.RecordSize > 0:
                        header = h;
                        break;

                    case XlsBiffHeaderFooterString f when rec.Id == BIFFRECORDTYPE.FOOTER && rec.RecordSize > 0:
                        footer = f;
                        break;

                    case XlsBiffCodeName codeName:
                        CodeName = codeName.GetValue(Encoding);
                        break;

                    case XlsBiffRow row:
                        SetMinMaxRow(row.RowIndex, row);

                        // Count rows by row records without affecting the overlap in OffsetMap
                        maxRowCountFromRowRecord = Math.Max(maxRowCountFromRowRecord, row.RowIndex + 1);
                        break;

                    case XlsBiffBlankCell cell:
                        maxCellColumn = Math.Max(maxCellColumn, cell.ColumnIndex + 1);
                        maxRowCount   = Math.Max(maxRowCount, cell.RowIndex + 1);
                        if (ixfeOffset != -1)
                        {
                            SetMinMaxRowOffset(cell.RowIndex, ixfeOffset, maxRowCount - 1);
                            ixfeOffset = -1;
                        }

                        SetMinMaxRowOffset(cell.RowIndex, recordOffset, maxRowCount - 1);
                        break;

                    case XlsBiffRecord ixfe when rec.Id == BIFFRECORDTYPE.IXFE:
                        ixfeOffset = recordOffset;
                        break;
                    }

                    recordOffset = biffStream.Position;
                    rec          = biffStream.Read();

                    // Stop if we find the start out a new substream. Not always that files have the required EOF before a substream BOF.
                    if (rec is XlsBiffBOF)
                    {
                        break;
                    }
                }

                if (header != null || footer != null)
                {
                    HeaderFooter = new HeaderFooter(footer?.GetValue(Encoding), header?.GetValue(Encoding));
                }

                foreach (var biffFormat in biffFormats)
                {
                    Workbook.AddNumberFormat(biffFormat.Key, biffFormat.Value.GetValue(Encoding));
                }

                if (mergeCells.Count > 0)
                {
                    MergeCells = mergeCells.ToArray();
                }

                if (FieldCount < maxCellColumn)
                {
                    FieldCount = maxCellColumn;
                }

                maxRowCount = Math.Max(maxRowCount, maxRowCountFromRowRecord);
                if (RowCount < maxRowCount)
                {
                    RowCount = maxRowCount;
                }

                if (columnWidths.Count > 0)
                {
                    ColumnWidths = columnWidths.ToArray();
                }
            }
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Reads additional records if needed: a string record might follow a formula result
        /// </summary>
        private Cell ReadSingleCell(XlsBiffStream biffStream, XlsBiffBlankCell cell, int xfIndex)
        {
            LogManager.Log(this).Debug("ReadSingleCell {0}", cell.Id);

            var effectiveStyle    = Workbook.GetEffectiveCellStyle(xfIndex, cell.Format);
            var numberFormatIndex = effectiveStyle.NumberFormatIndex;

            object    value = null;
            CellError?error = null;

            switch (cell.Id)
            {
            case BIFFRECORDTYPE.BOOLERR:
                if (cell.ReadByte(7) == 0)
                {
                    value = cell.ReadByte(6) != 0;
                }
                else
                {
                    error = (CellError)cell.ReadByte(6);
                }
                break;

            case BIFFRECORDTYPE.BOOLERR_OLD:
                if (cell.ReadByte(8) == 0)
                {
                    value = cell.ReadByte(7) != 0;
                }
                else
                {
                    error = (CellError)cell.ReadByte(7);
                }
                break;

            case BIFFRECORDTYPE.INTEGER:
            case BIFFRECORDTYPE.INTEGER_OLD:
                value = TryConvertOADateTime(((XlsBiffIntegerCell)cell).Value, numberFormatIndex);
                break;

            case BIFFRECORDTYPE.NUMBER:
            case BIFFRECORDTYPE.NUMBER_OLD:
                value = TryConvertOADateTime(((XlsBiffNumberCell)cell).Value, numberFormatIndex);
                break;

            case BIFFRECORDTYPE.LABEL:
            case BIFFRECORDTYPE.LABEL_OLD:
            case BIFFRECORDTYPE.RSTRING:
                value = GetLabelString((XlsBiffLabelCell)cell, effectiveStyle);
                break;

            case BIFFRECORDTYPE.LABELSST:
                value = Workbook.SST.GetString(((XlsBiffLabelSSTCell)cell).SSTIndex, Encoding);
                break;

            case BIFFRECORDTYPE.RK:
                value = TryConvertOADateTime(((XlsBiffRKCell)cell).Value, numberFormatIndex);
                break;

            case BIFFRECORDTYPE.BLANK:
            case BIFFRECORDTYPE.BLANK_OLD:
            case BIFFRECORDTYPE.MULBLANK:
                // Skip blank cells
                break;

            case BIFFRECORDTYPE.FORMULA:
            case BIFFRECORDTYPE.FORMULA_V3:
            case BIFFRECORDTYPE.FORMULA_V4:
                value = TryGetFormulaValue(biffStream, (XlsBiffFormulaCell)cell, effectiveStyle, out error);
                break;
            }

            return(new Cell(cell.ColumnIndex, value, effectiveStyle, error));
        }