public static EpubSchema ReadSchema(ZipArchive epubArchive) { EpubSchema result = new EpubSchema(); string rootFilePath = RootFilePathReader.GetRootFilePath(epubArchive); string contentDirectoryPath = ZipPathUtils.GetDirectoryPath(rootFilePath); result.ContentDirectoryPath = contentDirectoryPath; EpubPackage package = PackageReader.ReadPackage(epubArchive, rootFilePath); result.Package = package; EpubNavigation navigation = NavigationReader.ReadNavigation(epubArchive, contentDirectoryPath, package); result.Navigation = navigation; return(result); }
private ZipArchiveEntry GetContentFileEntry() { string contentFilePath = ZipPathUtils.Combine(epubBookRef.Schema.ContentDirectoryPath, FileName); ZipArchiveEntry contentFileEntry = epubBookRef.EpubArchive.GetEntry(contentFilePath); if (contentFileEntry == null) { throw new Exception(String.Format("EPUB parsing error: file {0} not found in archive.", contentFilePath)); } if (contentFileEntry.Length > Int32.MaxValue) { throw new Exception(String.Format("EPUB parsing error: file {0} is bigger than 2 Gb.", contentFilePath)); } return(contentFileEntry); }
public static async Task <EpubSchema> ReadSchemaAsync(ZipArchive epubArchive) { var result = new EpubSchema(); var rootFilePath = await RootFilePathReader.GetRootFilePathAsync(epubArchive).ConfigureAwait(false); var contentDirectoryPath = ZipPathUtils.GetDirectoryPath(rootFilePath); result.ContentDirectoryPath = contentDirectoryPath; var package = await PackageReader.ReadPackageAsync(epubArchive, rootFilePath).ConfigureAwait(false); result.Package = package; var navigation = await NavigationReader.ReadNavigationAsync(epubArchive, contentDirectoryPath, package).ConfigureAwait(false); result.Navigation = navigation; return(result); }
private ZipArchiveEntry GetContentFileEntry() { var contentFilePath = ZipPathUtils.Combine(_epubBookRef.Schema.ContentDirectoryPath, FileName); var contentFileEntry = _epubBookRef.EpubArchive.GetEntry(contentFilePath); if (contentFileEntry is null) { throw new Exception($"EPUB parsing error: file {contentFilePath} not found in archive."); } if (contentFileEntry.Length > int.MaxValue) { throw new Exception($"EPUB parsing error: file {contentFilePath} is bigger than 2 Gb."); } return(contentFileEntry); }
private ZipArchiveEntry GetContentFileEntry() { if (String.IsNullOrEmpty(FileName)) { throw new Exception("EPUB parsing error: file name of the specified content file is empty."); } string contentFilePath = ZipPathUtils.Combine(epubBookRef.Schema.ContentDirectoryPath, FileName); ZipArchiveEntry contentFileEntry = epubBookRef.EpubArchive.GetEntry(contentFilePath); if (contentFileEntry == null) { throw new Exception($"EPUB parsing error: file \"{contentFilePath}\" was not found in the archive."); } if (contentFileEntry.Length > Int32.MaxValue) { throw new Exception($"EPUB parsing error: file \"{contentFilePath}\" is larger than 2 Gb."); } return(contentFileEntry); }
public static async Task <EpubSchema> ReadSchemaAsync(ZipArchive epubArchive) { EpubSchema result = new EpubSchema(); // Reading META-INF/container.xml string rootFilePath = await RootFilePathReader.GetRootFilePathAsync(epubArchive); // Getting directory path - usually it's: META-INF/ string contentDirectoryPath = ZipPathUtils.GetDirectoryPath(rootFilePath); result.ContentDirectoryPath = contentDirectoryPath; //Reading the file content.opf EpubPackage package = await PackageReader.ReadPackageAsync(epubArchive, rootFilePath); result.Package = package; EpubNavigation navigation = await NavigationReader.ReadNavigationAsync(epubArchive, contentDirectoryPath, package); result.Navigation = navigation; return(result); }
/// <summary> /// Extracting all E-Book files from epub to disk using book key to creating sub-folder /// </summary> /// <param name="epubArchive"></param> /// <param name="book"></param> /// <returns></returns> public static async Task ExtractContentFilesToDiskAsync(ZipArchive epubArchive, EpubBook book) { //double progress = 20; //double increment = (double)80 / book.Schema.Package.Manifest.Count; string bookFolder = string.Empty; //StorageFolder destinationFolder = await Windows.ApplicationModel.Package.Current.InstalledLocation.GetFolderAsync("GUTS"); StorageFolder destinationFolder = Windows.ApplicationModel.Package.Current.InstalledLocation; foreach (EpubMetadataIdentifier id in book.Schema.Package.Metadata.Identifiers) { if (!string.IsNullOrEmpty(id.Identifier)) { bookFolder = ZipPathUtils.verifyPathName(string.Format("{0}_{1}", id.Identifier, id.Id)); break; } } //StorageApplicationPermissions.FutureAccessList.AddOrReplace("PickedFolderToken", destinationFolder); //Creating uniq subfolder for the book StorageFolder unzipFolder = await destinationFolder.CreateFolderAsync(bookFolder, CreationCollisionOption.GenerateUniqueName); //Unzippin try { //LogStatus("Unziping file: " + zipFile.DisplayName + "...", NotifyType.StatusMessage); await ZipUtils.UnZipFileAsync(epubArchive, unzipFolder); //LogStatus("Unzip file '" + zipFile.DisplayName + "' successfully!", NotifyType.StatusMessage); } catch (Exception ex) { throw new Exception(string.Format("Failed to unzip file ...{0}", ex.Message)); } return; }
public static async Task <EpubNavigation> ReadNavigationAsync(ZipArchive epubArchive, string contentDirectoryPath, EpubPackage package) { EpubNavigation result = new EpubNavigation(); string tocId = package.Spine.Toc; if (String.IsNullOrEmpty(tocId)) { throw new Exception("EPUB parsing error: TOC ID is empty."); } EpubManifestItem tocManifestItem = package.Manifest.FirstOrDefault(item => String.Compare(item.Id, tocId, StringComparison.OrdinalIgnoreCase) == 0); if (tocManifestItem == null) { throw new Exception(String.Format("EPUB parsing error: TOC item {0} not found in EPUB manifest.", tocId)); } string tocFileEntryPath = ZipPathUtils.Combine(contentDirectoryPath, tocManifestItem.Href); ZipArchiveEntry tocFileEntry = epubArchive.GetEntry(tocFileEntryPath); if (tocFileEntry == null) { throw new Exception(String.Format("EPUB parsing error: TOC file {0} not found in archive.", tocFileEntryPath)); } if (tocFileEntry.Length > Int32.MaxValue) { throw new Exception(String.Format("EPUB parsing error: TOC file {0} is bigger than 2 Gb.", tocFileEntryPath)); } XDocument containerDocument; using (Stream containerStream = tocFileEntry.Open()) containerDocument = await XmlUtils.LoadDocumentAsync(containerStream).ConfigureAwait(false); XNamespace ncxNamespace = "http://www.daisy.org/z3986/2005/ncx/"; XElement ncxNode = containerDocument.Element(ncxNamespace + "ncx"); if (ncxNode == null) { throw new Exception("EPUB parsing error: TOC file does not contain ncx element."); } XElement headNode = ncxNode.Element(ncxNamespace + "head"); if (headNode == null) { throw new Exception("EPUB parsing error: TOC file does not contain head element."); } EpubNavigationHead navigationHead = ReadNavigationHead(headNode); result.Head = navigationHead; XElement docTitleNode = ncxNode.Element(ncxNamespace + "docTitle"); if (docTitleNode == null) { throw new Exception("EPUB parsing error: TOC file does not contain docTitle element."); } EpubNavigationDocTitle navigationDocTitle = ReadNavigationDocTitle(docTitleNode); result.DocTitle = navigationDocTitle; result.DocAuthors = new List <EpubNavigationDocAuthor>(); foreach (XElement docAuthorNode in ncxNode.Elements(ncxNamespace + "docAuthor")) { EpubNavigationDocAuthor navigationDocAuthor = ReadNavigationDocAuthor(docAuthorNode); result.DocAuthors.Add(navigationDocAuthor); } XElement navMapNode = ncxNode.Element(ncxNamespace + "navMap"); if (navMapNode == null) { throw new Exception("EPUB parsing error: TOC file does not contain navMap element."); } EpubNavigationMap navMap = ReadNavigationMap(navMapNode); result.NavMap = navMap; XElement pageListNode = ncxNode.Element(ncxNamespace + "pageList"); if (pageListNode != null) { EpubNavigationPageList pageList = ReadNavigationPageList(pageListNode); result.PageList = pageList; } result.NavLists = new List <EpubNavigationList>(); foreach (XElement navigationListNode in ncxNode.Elements(ncxNamespace + "navList")) { EpubNavigationList navigationList = ReadNavigationList(navigationListNode); result.NavLists.Add(navigationList); } return(result); }
public static async Task <EpubNavigation> ReadNavigationAsync(ZipArchive epubArchive, string contentDirectoryPath, EpubPackage package) { EpubNavigation result = new EpubNavigation(); string tocId = package.Spine.Toc; XmlReaderSettings xmlReaderSettings = new XmlReaderSettings { // XmlResolver = null, Async = true, DtdProcessing = DtdProcessing.Ignore }; if (String.IsNullOrEmpty(tocId)) { throw new Exception("EPUB parsing error: TOC ID is empty."); } //Cheking if toc id referenced in spine exist in manifest EpubManifestItem tocManifestItem = package.Manifest.FirstOrDefault(item => String.Compare(item.Id, tocId, StringComparison.OrdinalIgnoreCase) == 0); if (tocManifestItem == null) { throw new Exception(String.Format("EPUB parsing error: TOC item {0} not found in EPUB manifest.", tocId)); } //Opening .toc file in archive using href-reference from manifest string tocFileEntryPath = ZipPathUtils.Combine(contentDirectoryPath, tocManifestItem.Href); ZipArchiveEntry tocFileEntry = epubArchive.GetEntry(tocFileEntryPath); if (tocFileEntry == null) { throw new Exception(String.Format("EPUB parsing error: TOC file {0} not found in archive.", tocFileEntryPath)); } if (tocFileEntry.Length > Int32.MaxValue) { throw new Exception(String.Format("EPUB parsing error: TOC file {0} is bigger than 2 Gb.", tocFileEntryPath)); } // ------------------ Actual Parsing starts here: ------------------------- using (Stream containerStream = tocFileEntry.Open()) { using (XmlReader xmlReader = XmlReader.Create(containerStream, xmlReaderSettings)) { result.Head = await ReadNavigationHeadAsync(xmlReader); result.DocTitle = await ReadNavigationDocTitleAsync(xmlReader); result.DocAuthors = await ReadNavigationAuthorsAsync(xmlReader); result.NavMap = await ReadNavigationMapAsync(xmlReader); result.NavLists = new List <EpubNavigationList>(); //Empty, because not implemented result.PageList = new EpubNavigationPageList(); //Empty, because not implemented } } return(result); //-------------------------------------------Boring old style Silverlight code...----------------------------------------------------------------- //------------------------------------------------------------------------------------------------------------------------------------------------ //XmlDocument containerDocument; //containerDocument = XmlDocument.Load(containerStream); //XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(containerDocument.NameTable); //xmlNamespaceManager.AddNamespace("ncx", "http://www.daisy.org/z3986/2005/ncx/"); ////Parsing head section //XmlNode headNode = containerDocument.DocumentElement.SelectSingleNode("ncx:head", xmlNamespaceManager); //if (headNode == null) // throw new Exception("EPUB parsing error: TOC file does not contain head element"); //EpubNavigationHead navigationHead = ReadNavigationHead(headNode); //result.Head = navigationHead; ////Parsing title //XmlNode docTitleNode = containerDocument.DocumentElement.SelectSingleNode("ncx:docTitle", xmlNamespaceManager); //if (docTitleNode == null) // throw new Exception("EPUB parsing error: TOC file does not contain docTitle element"); //EpubNavigationDocTitle navigationDocTitle = ReadNavigationDocTitle(docTitleNode); //result.DocTitle = navigationDocTitle; ////Parsing authors section... //result.DocAuthors = new List<EpubNavigationDocAuthor>(); //foreach (XmlNode docAuthorNode in containerDocument.DocumentElement.SelectNodes("ncx:docAuthor", xmlNamespaceManager)) //{ // EpubNavigationDocAuthor navigationDocAuthor = ReadNavigationDocAuthor(docAuthorNode); // result.DocAuthors.Add(navigationDocAuthor); //} //Parsing navMap section //XmlNode navMapNode = containerDocument.DocumentElement.SelectSingleNode("ncx:navMap", xmlNamespaceManager); //if (navMapNode == null) // throw new Exception("EPUB parsing error: TOC file does not contain navMap element"); //EpubNavigationMap navMap = ReadNavigationMap(navMapNode); //result.NavMap = navMap; //-----------------------------------TO-DO: Implement ----------------------------------------------------------- //TO-DO: Implement pageList parsing. Needed to tide-up position inside epub to actual pages of the printed book //-------------------------------------------------------------------------------------------------------------- //Parsing pageList node //XmlNode pageListNode = containerDocument.DocumentElement.SelectSingleNode("ncx:pageList", xmlNamespaceManager); //if (pageListNode != null) //{ // EpubNavigationPageList pageList = ReadNavigationPageList(pageListNode); // result.PageList = pageList; //} ////TO-DO: Implement navList parsing. It is a secondary navigation system for supplied book info - schemes, fugures, diagrams, illustrations etc ////Parsing navList nodes //result.NavLists = new List<EpubNavigationList>(); //foreach (XmlNode navigationListNode in containerDocument.DocumentElement.SelectNodes("ncx:navList", xmlNamespaceManager)) //{ // EpubNavigationList navigationList = ReadNavigationList(navigationListNode); // result.NavLists.Add(navigationList); //} //-------------------------------------------------------------------------------------------------------------- }
/// <summary> /// Reading all E-Book files to memory structure EpubContent /// </summary> /// <param name="epubArchive"></param> /// <param name="book"></param> /// <returns></returns> public static EpubContent ReadContentFilesToMemory(ZipArchive epubArchive, EpubBook book) { EpubContent result = new EpubContent { Html = new Dictionary <string, EpubTextContentFile>(), Css = new Dictionary <string, EpubTextContentFile>(), Images = new Dictionary <string, EpubByteContentFile>(), Fonts = new Dictionary <string, EpubByteContentFile>(), AllFiles = new Dictionary <string, EpubContentFile>() }; //double progress = 20; //double increment = (double)80 / book.Schema.Package.Manifest.Count; foreach (EpubManifestItem manifestItem in book.Schema.Package.Manifest) { string contentFilePath = ZipPathUtils.Combine(book.Schema.ContentDirectoryPath, manifestItem.Href); ZipArchiveEntry contentFileEntry = epubArchive.GetEntry(contentFilePath); if (contentFileEntry == null) { throw new Exception(String.Format("EPUB parsing error: file {0} not found in archive.", contentFilePath)); } if (contentFileEntry.Length > Int32.MaxValue) { throw new Exception(String.Format("EPUB parsing error: file {0} is bigger than 2 Gb.", contentFilePath)); } string fileName = manifestItem.Href; string contentMimeType = manifestItem.MediaType; EpubContentType contentType = GetContentTypeByContentMimeType(contentMimeType); switch (contentType) { case EpubContentType.XHTML_1_1: case EpubContentType.CSS: case EpubContentType.OEB1_DOCUMENT: case EpubContentType.OEB1_CSS: case EpubContentType.XHTML_1_1XML: case EpubContentType.DTBOOK: case EpubContentType.DTBOOK_NCX: EpubTextContentFile epubTextContentFile = new EpubTextContentFile { FileName = fileName, ContentMimeType = contentMimeType, ContentType = contentType }; using (Stream contentStream = contentFileEntry.Open()) { if (contentStream == null) { throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName)); } using (StreamReader streamReader = new StreamReader(contentStream)) epubTextContentFile.Content = streamReader.ReadToEnd(); } switch (contentType) { case EpubContentType.XHTML_1_1: result.Html.Add(fileName, epubTextContentFile); break; case EpubContentType.CSS: result.Css.Add(fileName, epubTextContentFile); break; } //В данный момент в AllFiles контент не попадает, так как отсутствует конвертация из EpubTextContentFile в EpubContentFile, //а именно, нет конвертации из string в byte[] result.AllFiles.Add(fileName, epubTextContentFile); break; default: EpubByteContentFile epubByteContentFile = new EpubByteContentFile { FileName = fileName, ContentMimeType = contentMimeType, ContentType = contentType }; using (Stream contentStream = contentFileEntry.Open()) { if (contentStream == null) { throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName)); } using (MemoryStream memoryStream = new MemoryStream((int)contentFileEntry.Length)) { contentStream.CopyTo(memoryStream); epubByteContentFile.Content = memoryStream.ToArray(); } } switch (contentType) { case EpubContentType.IMAGE_GIF: case EpubContentType.IMAGE_JPEG: case EpubContentType.IMAGE_PNG: case EpubContentType.IMAGE_SVG: result.Images.Add(fileName, epubByteContentFile); break; case EpubContentType.FONT_TRUETYPE: case EpubContentType.FONT_OPENTYPE: result.Fonts.Add(fileName, epubByteContentFile); break; } result.AllFiles.Add(fileName, epubByteContentFile); break; } } return(result); }
public static EpubNavigation ReadNavigation(ZipArchive epubArchive, string contentDirectoryPath, EpubPackage package) { EpubNavigation result = new EpubNavigation(); string tocId = package.Spine.Toc; if (String.IsNullOrEmpty(tocId)) { throw new Exception("EPUB parsing error: TOC ID is empty."); } EpubManifestItem tocManifestItem = package.Manifest.FirstOrDefault(item => String.Compare(item.Id, tocId, StringComparison.OrdinalIgnoreCase) == 0); if (tocManifestItem == null) { throw new Exception(String.Format("EPUB parsing error: TOC item {0} not found in EPUB manifest.", tocId)); } string tocFileEntryPath = ZipPathUtils.Combine(contentDirectoryPath, tocManifestItem.Href); ZipArchiveEntry tocFileEntry = epubArchive.GetEntry(tocFileEntryPath); if (tocFileEntry == null) { throw new Exception(String.Format("EPUB parsing error: TOC file {0} not found in archive.", tocFileEntryPath)); } if (tocFileEntry.Length > Int32.MaxValue) { throw new Exception(String.Format("EPUB parsing error: TOC file {0} is bigger than 2 Gb.", tocFileEntryPath)); } XmlDocument containerDocument; using (Stream containerStream = tocFileEntry.Open()) containerDocument = XmlUtils.LoadDocument(containerStream); XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(containerDocument.NameTable); xmlNamespaceManager.AddNamespace("ncx", "http://www.daisy.org/z3986/2005/ncx/"); XmlNode headNode = containerDocument.DocumentElement.SelectSingleNode("ncx:head", xmlNamespaceManager); if (headNode == null) { throw new Exception("EPUB parsing error: TOC file does not contain head element"); } EpubNavigationHead navigationHead = ReadNavigationHead(headNode); result.Head = navigationHead; XmlNode docTitleNode = containerDocument.DocumentElement.SelectSingleNode("ncx:docTitle", xmlNamespaceManager); if (docTitleNode == null) { throw new Exception("EPUB parsing error: TOC file does not contain docTitle element"); } EpubNavigationDocTitle navigationDocTitle = ReadNavigationDocTitle(docTitleNode); result.DocTitle = navigationDocTitle; result.DocAuthors = new List <EpubNavigationDocAuthor>(); foreach (XmlNode docAuthorNode in containerDocument.DocumentElement.SelectNodes("ncx:docAuthor", xmlNamespaceManager)) { EpubNavigationDocAuthor navigationDocAuthor = ReadNavigationDocAuthor(docAuthorNode); result.DocAuthors.Add(navigationDocAuthor); } XmlNode navMapNode = containerDocument.DocumentElement.SelectSingleNode("ncx:navMap", xmlNamespaceManager); if (navMapNode == null) { throw new Exception("EPUB parsing error: TOC file does not contain navMap element"); } EpubNavigationMap navMap = ReadNavigationMap(navMapNode); result.NavMap = navMap; XmlNode pageListNode = containerDocument.DocumentElement.SelectSingleNode("ncx:pageList", xmlNamespaceManager); if (pageListNode != null) { EpubNavigationPageList pageList = ReadNavigationPageList(pageListNode); result.PageList = pageList; } result.NavLists = new List <EpubNavigationList>(); foreach (XmlNode navigationListNode in containerDocument.DocumentElement.SelectNodes("ncx:navList", xmlNamespaceManager)) { EpubNavigationList navigationList = ReadNavigationList(navigationListNode); result.NavLists.Add(navigationList); } return(result); }