/// <summary> /// Adds a directory, along with all files and subdirectories, to the ZipStream. /// </summary> /// <param name="directoryToCompress">The directory to add recursively</param> /// <param name="zipStream">The ZipStream to which the files and directories will be added</param> /// <param name="dirNameOffset">This number of characters will be removed from the full directory or file name /// before creating the zip entry name</param> /// <param name="dirNamePrefix">string to prefix to the zip entry name</param> /// <param name="depthFromCollection">int with the number of folders away it is from the collection folder. The collection folder itself is 0, /// a book is 1, a subfolder of the book is 2, etc.</param> /// <param name="forReaderTools">If True, then some pre-processing will be done to the contents of decodable /// and leveled readers before they are added to the ZipStream</param> /// <param name="excludeAudio">If true, the contents of the audio directory will not be included</param> /// <param name="reduceImages">If true, image files are reduced in size to no larger than the max size before saving</para> /// <param name="omitMetaJson">If true, meta.json is excluded (typically for HTML readers).</param> private static void CompressDirectory(string directoryToCompress, ZipOutputStream zipStream, int dirNameOffset, string dirNamePrefix, int depthFromCollection, bool forReaderTools, bool excludeAudio, bool reduceImages, bool omitMetaJson = false, string pathToFileForSha = null) { if (excludeAudio && Path.GetFileName(directoryToCompress).ToLowerInvariant() == "audio") { return; } var files = Directory.GetFiles(directoryToCompress); // Don't get distracted by HTML files in any folder other than the book folder. // These HTML files in other locations aren't generated by Bloom. They may not have the format Bloom expects, // causing needless parsing errors to be thrown if we attempt to read them using Bloom code. bool shouldScanHtml = depthFromCollection == 1; // 1 means 1 level below the collection level, i.e. this is the book level var bookFile = shouldScanHtml ? BookStorage.FindBookHtmlInFolder(directoryToCompress) : null; XmlDocument dom = null; List <string> imagesToGiveTransparentBackgrounds = null; List <string> imagesToPreserveResolution = null; // Tests can also result in bookFile being null. if (!String.IsNullOrEmpty(bookFile)) { var originalContent = File.ReadAllText(bookFile, Encoding.UTF8); dom = XmlHtmlConverter.GetXmlDomFromHtml(originalContent); var fullScreenAttr = dom.GetElementsByTagName("body").Cast <XmlElement>().First().Attributes["data-bffullscreenpicture"]?.Value; if (fullScreenAttr != null && fullScreenAttr.IndexOf("bloomReader", StringComparison.InvariantCulture) >= 0) { // This feature (currently used for motion books in landscape mode) triggers an all-black background, // due to a rule in bookFeatures.less. // Making white pixels transparent on an all-black background makes line-art disappear, // which is bad (BL-6564), so just make an empty list in this case. imagesToGiveTransparentBackgrounds = new List <string>(); } else { imagesToGiveTransparentBackgrounds = FindCoverImages(dom); } imagesToPreserveResolution = FindImagesToPreserveResolution(dom); FindBackgroundAudioFiles(dom); } else { imagesToGiveTransparentBackgrounds = new List <string>(); imagesToPreserveResolution = new List <string>(); } // Some of the knowledge about ExcludedFileExtensions might one day move into this method. // But we'd have to check carefully the other places it is used. var localOnlyFiles = BookStorage.LocalOnlyFiles(directoryToCompress); foreach (var filePath in files) { if (ExcludedFileExtensionsLowerCase.Contains(Path.GetExtension(filePath.ToLowerInvariant()))) { continue; // BL-2246: skip putting this one into the BloomPack } if (IsUnneededWaveFile(filePath, depthFromCollection)) { continue; } if (localOnlyFiles.Contains(filePath)) { continue; } var fileName = Path.GetFileName(filePath).ToLowerInvariant(); if (fileName.StartsWith(BookStorage.PrefixForCorruptHtmFiles)) { continue; } // Various stuff we keep in the book folder that is useful for editing or bloom library // or displaying collections but not needed by the reader. The most important is probably // eliminating the pdf, which can be very large. Note that we do NOT eliminate the // basic thumbnail.png, as we want eventually to extract that to use in the Reader UI. if (fileName == "thumbnail-70.png" || fileName == "thumbnail-256.png") { continue; } if (fileName == "meta.json" && omitMetaJson) { continue; } FileInfo fi = new FileInfo(filePath); var entryName = dirNamePrefix + filePath.Substring(dirNameOffset); // Makes the name in zip based on the folder entryName = ZipEntry.CleanName(entryName); // Removes drive from name and fixes slash direction ZipEntry newEntry = new ZipEntry(entryName) { DateTime = fi.LastWriteTime, IsUnicodeText = true }; // encode filename and comment in UTF8 byte[] modifiedContent = {}; // if this is a ReaderTools book, call GetBookReplacedWithTemplate() to get the contents if (forReaderTools && (bookFile == filePath)) { modifiedContent = Encoding.UTF8.GetBytes(GetBookReplacedWithTemplate(filePath)); newEntry.Size = modifiedContent.Length; } else if (forReaderTools && (Path.GetFileName(filePath) == "meta.json")) { modifiedContent = Encoding.UTF8.GetBytes(GetMetaJsonModfiedForTemplate(filePath)); newEntry.Size = modifiedContent.Length; } else if (reduceImages && ImageFileExtensions.Contains(Path.GetExtension(filePath.ToLowerInvariant()))) { fileName = Path.GetFileName(filePath); // restore original capitalization if (imagesToPreserveResolution.Contains(fileName)) { modifiedContent = RobustFile.ReadAllBytes(filePath); } else { // Cover images should be transparent if possible. Others don't need to be. var makeBackgroundTransparent = imagesToGiveTransparentBackgrounds.Contains(fileName); modifiedContent = GetImageBytesForElectronicPub(filePath, makeBackgroundTransparent); } newEntry.Size = modifiedContent.Length; } else if (Path.GetExtension(filePath).ToLowerInvariant() == ".bloomcollection") { modifiedContent = Encoding.UTF8.GetBytes(GetBloomCollectionModifiedForTemplate(filePath)); newEntry.Size = modifiedContent.Length; } // CompressBookForDevice is always called with reduceImages set. else if (reduceImages && bookFile == filePath) { SignLanguageApi.ProcessVideos(HtmlDom.SelectChildVideoElements(dom.DocumentElement).Cast <XmlElement>(), directoryToCompress); var newContent = XmlHtmlConverter.ConvertDomToHtml5(dom); modifiedContent = Encoding.UTF8.GetBytes(newContent); newEntry.Size = modifiedContent.Length; if (pathToFileForSha != null) { // Make an extra entry containing the sha var sha = Book.ComputeHashForAllBookRelatedFiles(pathToFileForSha); var name = "version.txt"; // must match what BloomReader is looking for in NewBookListenerService.IsBookUpToDate() MakeExtraEntry(zipStream, name, sha); LastVersionCode = sha; } } else { newEntry.Size = fi.Length; } zipStream.PutNextEntry(newEntry); if (modifiedContent.Length > 0) { using (var memStream = new MemoryStream(modifiedContent)) { // There is some minimum buffer size (44 was too small); I don't know exactly what it is, // but 1024 makes it happy. StreamUtils.Copy(memStream, zipStream, new byte[Math.Max(modifiedContent.Length, 1024)]); } } else { // Zip the file in buffered chunks byte[] buffer = new byte[4096]; using (var streamReader = RobustFile.OpenRead(filePath)) { StreamUtils.Copy(streamReader, zipStream, buffer); } } zipStream.CloseEntry(); } var folders = Directory.GetDirectories(directoryToCompress); foreach (var folder in folders) { var dirName = Path.GetFileName(folder); if ((dirName == null) || (dirName.ToLowerInvariant() == "sample texts")) { continue; // Don't want to bundle these up } CompressDirectory(folder, zipStream, dirNameOffset, dirNamePrefix, depthFromCollection + 1, forReaderTools, excludeAudio, reduceImages); } }