/// <summary> /// When a Excel document is embedded in for example a Word document the Workbook /// is set to hidden. Don't know why Microsoft does this but they do. To solve this /// problem we seek the WINDOW1 record in the BOF record of the stream. In there a /// gbit structure is located. The first bit in this structure controls the visibility /// of the workbook, so we check if this bit is set to 1 (hidden) and if so set it to 0. /// Normally a Workbook stream only contains one WINDOW record but when it is embedded /// it will contain 2 or more records. /// </summary> /// <param name="rootStorage">The <see cref="CFStorage">Root storage</see> of a <see cref="CompoundFile"/></param> /// <exception cref="OEFileIsCorrupt">Raised when the <paramref name="rootStorage"/> does not have a Workbook stream</exception> public static void SetWorkbookVisibility(CFStorage rootStorage) { if (!rootStorage.ExistsStream("WorkBook")) { throw new OEFileIsCorrupt("Could not check workbook visibility because the WorkBook stream is not present"); } try { var stream = rootStorage.GetStream("WorkBook") as CFStream; if (stream == null) { return; } var bytes = stream.GetData(); using (var memoryStream = new MemoryStream(bytes)) using (var binaryReader = new BinaryReader(memoryStream)) { // Get the record type, at the beginning of the stream this should always be the BOF var recordType = binaryReader.ReadUInt16(); var recordLength = binaryReader.ReadUInt16(); // Something seems to be wrong, we would expect a BOF but for some reason it isn't if (recordType != 0x809) { throw new OEFileIsCorrupt("The file is corrupt"); } binaryReader.BaseStream.Position += recordLength; while (binaryReader.BaseStream.Position < binaryReader.BaseStream.Length) { recordType = binaryReader.ReadUInt16(); recordLength = binaryReader.ReadUInt16(); // Window1 record (0x3D) if (recordType == 0x3D) { // ReSharper disable UnusedVariable var xWn = binaryReader.ReadUInt16(); var yWn = binaryReader.ReadUInt16(); var dxWn = binaryReader.ReadUInt16(); var dyWn = binaryReader.ReadUInt16(); // ReSharper restore UnusedVariable // The grbit contains the bit that hides the sheet var grbit = binaryReader.ReadBytes(2); var bitArray = new BitArray(grbit); // When the bit is set then unset it (bitArray.Get(0) == true) if (bitArray.Get(0)) { bitArray.Set(0, false); // Copy the byte back into the stream, 2 positions back so that we overwrite the old bytes bitArray.CopyTo(bytes, (int)binaryReader.BaseStream.Position - 2); } break; } binaryReader.BaseStream.Position += recordLength; } } stream.SetData(bytes); } catch (Exception exception) { throw new OEFileIsCorrupt( "Could not check workbook visibility because the file seems to be corrupt", exception); } }
/// <summary> /// This method will extract and save the data from the given <see cref="storage"/> node to the <see cref="outputFolder"/> /// </summary> /// <param name="storage">The <see cref="CFStorage"/> node</param> /// <param name="outputFolder">The outputFolder</param> /// <param name="fileName">The fileName to use, null when the fileName is unknown</param> /// <returns></returns> /// <exception cref="OEFileIsPasswordProtected">Raised when a WordDocument, WorkBook or PowerPoint Document stream is password protected</exception> public static string SaveFromStorageNode(CFStorage storage, string outputFolder, string fileName) { if (storage.ExistsStream("CONTENTS")) { var contents = storage.GetStream("CONTENTS"); if (contents.Size <= 0) { return(null); } if (string.IsNullOrWhiteSpace(fileName)) { fileName = DefaultEmbeddedObjectName; } return(SaveByteArrayToFile(contents.GetData(), Path.Combine(outputFolder, fileName))); } if (storage.ExistsStream("Package")) { var package = storage.GetStream("Package"); if (package.Size <= 0) { return(null); } if (string.IsNullOrWhiteSpace(fileName)) { fileName = DefaultEmbeddedObjectName; } return(SaveByteArrayToFile(package.GetData(), Path.Combine(outputFolder, fileName))); } if (storage.ExistsStream("EmbeddedOdf")) { // The embedded object is an Embedded ODF file var package = storage.GetStream("EmbeddedOdf"); if (package.Size <= 0) { return(null); } if (string.IsNullOrWhiteSpace(fileName)) { fileName = DefaultEmbeddedObjectName; } return(SaveByteArrayToFile(package.GetData(), Path.Combine(outputFolder, fileName))); } if (storage.ExistsStream("\x0001Ole10Native")) { var ole10Native = new Ole10Native(storage); return(ole10Native.Format == OleFormat.File ? SaveByteArrayToFile(ole10Native.NativeData, Path.Combine(outputFolder, ole10Native.FileName)) : null); } if (storage.ExistsStream("WordDocument")) { // The embedded object is a Word file if (string.IsNullOrWhiteSpace(fileName)) { fileName = "Embedded Word document.doc"; } return(SaveStorageTreeToCompoundFile(storage, Path.Combine(outputFolder, fileName))); } if (storage.ExistsStream("Workbook")) { // The embedded object is an Excel file if (string.IsNullOrWhiteSpace(fileName)) { fileName = "Embedded Excel document.xls"; } Excel.SetWorkbookVisibility(storage); return(SaveStorageTreeToCompoundFile(storage, Path.Combine(outputFolder, fileName))); } if (storage.ExistsStream("PowerPoint Document")) { // The embedded object is a PowerPoint file if (string.IsNullOrWhiteSpace(fileName)) { fileName = "Embedded PowerPoint document.ppt"; } return(SaveStorageTreeToCompoundFile(storage, Path.Combine(outputFolder, fileName))); } return(null); }
/// <summary> /// This method will extract and save the data from the given <see cref="storage"/> node to the <see cref="outputFolder"/> /// </summary> /// <param name="storage">The <see cref="CFStorage"/> node</param> /// <param name="outputFolder">The outputFolder</param> /// <param name="fileName">The fileName to use, null when the fileName is unknown</param> /// <returns></returns> /// <exception cref="OEFileIsPasswordProtected">Raised when a WordDocument, WorkBook or PowerPoint Document stream is password protected</exception> public static string SaveFromStorageNode(CFStorage storage, string outputFolder, string fileName) { if (storage.ExistsStream("CONTENTS")) { var contents = storage.GetStream("CONTENTS"); if (contents.Size <= 0) return null; if (string.IsNullOrWhiteSpace(fileName)) fileName = DefaultEmbeddedObjectName; return SaveByteArrayToFile(contents.GetData(), Path.Combine(outputFolder, fileName)); } if (storage.ExistsStream("Package")) { var package = storage.GetStream("Package"); if (package.Size <= 0) return null; if (string.IsNullOrWhiteSpace(fileName)) fileName = DefaultEmbeddedObjectName; return SaveByteArrayToFile(package.GetData(), Path.Combine(outputFolder, fileName)); } if (storage.ExistsStream("EmbeddedOdf")) { // The embedded object is an Embedded ODF file var package = storage.GetStream("EmbeddedOdf"); if (package.Size <= 0) return null; if (string.IsNullOrWhiteSpace(fileName)) fileName = DefaultEmbeddedObjectName; return SaveByteArrayToFile(package.GetData(), Path.Combine(outputFolder, fileName)); } if (storage.ExistsStream("\x0001Ole10Native")) { var ole10Native = new Ole10Native(storage); return ole10Native.Format == OleFormat.File ? SaveByteArrayToFile(ole10Native.NativeData, Path.Combine(outputFolder, ole10Native.FileName)) : null; } if (storage.ExistsStream("WordDocument")) { // The embedded object is a Word file if (string.IsNullOrWhiteSpace(fileName)) fileName = "Embedded Word document.doc"; return SaveStorageTreeToCompoundFile(storage, Path.Combine(outputFolder, fileName)); } if (storage.ExistsStream("Workbook")) { // The embedded object is an Excel file if (string.IsNullOrWhiteSpace(fileName)) fileName = "Embedded Excel document.xls"; Excel.SetWorkbookVisibility(storage); return SaveStorageTreeToCompoundFile(storage, Path.Combine(outputFolder, fileName)); } if (storage.ExistsStream("PowerPoint Document")) { // The embedded object is a PowerPoint file if (string.IsNullOrWhiteSpace(fileName)) fileName = "Embedded PowerPoint document.ppt"; return SaveStorageTreeToCompoundFile(storage, Path.Combine(outputFolder, fileName)); } return null; }
/// <summary> /// When a Excel document is embedded in for example a Word document the Workbook /// is set to hidden. Don't know why Microsoft does this but they do. To solve this /// problem we seek the WINDOW1 record in the BOF record of the stream. In there a /// gbit structure is located. The first bit in this structure controls the visibility /// of the workbook, so we check if this bit is set to 1 (hidden) en is so set it to 0. /// Normally a Workbook stream only contains one WINDOW record but when it is embedded /// it will contain 2 or more records. /// </summary> /// <param name="rootStorage">The <see cref="CFStorage">Root storage</see> of a <see cref="CompoundFile"/></param> /// <exception cref="OEFileIsCorrupt">Raised when the <paramref name="rootStorage"/> does not have a Workbook stream</exception> public static void SetWorkbookVisibility(CFStorage rootStorage) { if (!rootStorage.ExistsStream("WorkBook")) throw new OEFileIsCorrupt("Could not check workbook visibility because the WorkBook stream is not present"); try { var stream = rootStorage.GetStream("WorkBook") as CFStream; if (stream == null) return; var bytes = stream.GetData(); using (var memoryStream = new MemoryStream(bytes)) using (var binaryReader = new BinaryReader(memoryStream)) { // Get the record type, at the beginning of the stream this should always be the BOF var recordType = binaryReader.ReadUInt16(); var recordLength = binaryReader.ReadUInt16(); // Something seems to be wrong, we would expect a BOF but for some reason it isn't if (recordType != 0x809) throw new OEFileIsCorrupt("The file is corrupt"); binaryReader.BaseStream.Position += recordLength; while (binaryReader.BaseStream.Position < binaryReader.BaseStream.Length) { recordType = binaryReader.ReadUInt16(); recordLength = binaryReader.ReadUInt16(); // Window1 record (0x3D) if (recordType == 0x3D) { // ReSharper disable UnusedVariable var xWn = binaryReader.ReadUInt16(); var yWn = binaryReader.ReadUInt16(); var dxWn = binaryReader.ReadUInt16(); var dyWn = binaryReader.ReadUInt16(); // ReSharper restore UnusedVariable // The grbit contains the bit that hides the sheet var grbit = binaryReader.ReadBytes(2); var bitArray = new BitArray(grbit); // When the bit is set then unset it (bitArray.Get(0) == true) if (bitArray.Get(0)) { bitArray.Set(0, false); // Copy the byte back into the stream, 2 positions back so that we overwrite the old bytes bitArray.CopyTo(bytes, (int)binaryReader.BaseStream.Position - 2); } break; } binaryReader.BaseStream.Position += recordLength; } } stream.SetData(bytes); } catch (Exception exception) { throw new OEFileIsCorrupt( "Could not check workbook visibility because the file seems to be corrupt", exception); } }