public static EpubNavigation ReadNavigation(ZipArchive epubArchive, string contentDirectoryPath, EpubPackage package) { EpubNavigation result = new EpubNavigation(); string tocId = package.Spine.Toc; if (String.IsNullOrEmpty(tocId)) throw new Exception("EPUB parsing error: TOC ID is empty."); EpubManifestItem tocManifestItem = package.Manifest.FirstOrDefault(item => String.Compare(item.Id, tocId, StringComparison.OrdinalIgnoreCase) == 0); if (tocManifestItem == null) throw new Exception(String.Format("EPUB parsing error: TOC item {0} not found in EPUB manifest.", tocId)); string tocFileEntryPath = ZipPathUtils.Combine(contentDirectoryPath, tocManifestItem.Href); ZipArchiveEntry tocFileEntry = epubArchive.GetEntry(tocFileEntryPath); if (tocFileEntry == null) throw new Exception(String.Format("EPUB parsing error: TOC file {0} not found in archive.", tocFileEntryPath)); if (tocFileEntry.Length > Int32.MaxValue) throw new Exception(String.Format("EPUB parsing error: TOC file {0} is bigger than 2 Gb.", tocFileEntryPath)); XmlDocument containerDocument; using (Stream containerStream = tocFileEntry.Open()) containerDocument = XmlUtils.LoadDocument(containerStream); XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(containerDocument.NameTable); xmlNamespaceManager.AddNamespace("ncx", "http://www.daisy.org/z3986/2005/ncx/"); XmlNode headNode = containerDocument.DocumentElement.SelectSingleNode("ncx:head", xmlNamespaceManager); if (headNode == null) throw new Exception("EPUB parsing error: TOC file does not contain head element"); EpubNavigationHead navigationHead = ReadNavigationHead(headNode); result.Head = navigationHead; XmlNode docTitleNode = containerDocument.DocumentElement.SelectSingleNode("ncx:docTitle", xmlNamespaceManager); if (docTitleNode == null) throw new Exception("EPUB parsing error: TOC file does not contain docTitle element"); EpubNavigationDocTitle navigationDocTitle = ReadNavigationDocTitle(docTitleNode); result.DocTitle = navigationDocTitle; result.DocAuthors = new List<EpubNavigationDocAuthor>(); foreach (XmlNode docAuthorNode in containerDocument.DocumentElement.SelectNodes("ncx:docAuthor", xmlNamespaceManager)) { EpubNavigationDocAuthor navigationDocAuthor = ReadNavigationDocAuthor(docAuthorNode); result.DocAuthors.Add(navigationDocAuthor); } XmlNode navMapNode = containerDocument.DocumentElement.SelectSingleNode("ncx:navMap", xmlNamespaceManager); if (navMapNode == null) throw new Exception("EPUB parsing error: TOC file does not contain navMap element"); EpubNavigationMap navMap = ReadNavigationMap(navMapNode); result.NavMap = navMap; XmlNode pageListNode = containerDocument.DocumentElement.SelectSingleNode("ncx:pageList", xmlNamespaceManager); if (pageListNode != null) { EpubNavigationPageList pageList = ReadNavigationPageList(pageListNode); result.PageList = pageList; } result.NavLists = new List<EpubNavigationList>(); foreach (XmlNode navigationListNode in containerDocument.DocumentElement.SelectNodes("ncx:navList", xmlNamespaceManager)) { EpubNavigationList navigationList = ReadNavigationList(navigationListNode); result.NavLists.Add(navigationList); } return result; }
//Parsing metadata, manifest, spine and guide public static async Task<EpubPackage> ReadPackageAsync(ZipArchive epubArchive, string rootFilePath) { EpubPackage result = new EpubPackage(); XmlReaderSettings xmlReaderSettings = new XmlReaderSettings { // XmlResolver = null, Async = true, DtdProcessing = DtdProcessing.Ignore }; ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath); if (rootFileEntry == null) throw new Exception(string.Format("EPUB parsing error: {0} file not found in archive.", rootFilePath)); //Starting content.opf parsing... using (Stream containerStream = rootFileEntry.Open()) { using (XmlReader xmlReader = XmlReader.Create(containerStream, xmlReaderSettings)) { await xmlReader.ReadToFollowingAsync("package", "http://www.idpf.org/2007/opf"); //Trying to get version attribute from <package version=... //Looks like we only need EPUB version data and we don`t care about unique-identifier //if EPUB version is FUBAR then throwing an exeption xmlReader.MoveToAttribute("version"); string epubVersionValue = xmlReader.Value; if (epubVersionValue == "2.0") result.EpubVersion = EpubVersion.EPUB_2; else if (epubVersionValue == "3.0") result.EpubVersion = EpubVersion.EPUB_3; else throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue)); //Reading metadata EpubMetadata metadata = await ReadMetadataAsync(xmlReader, result.EpubVersion); result.Metadata = metadata; //Reading manifest EpubManifest manifest = await ReadManifestAsync(xmlReader); result.Manifest = manifest; //Reading spine EpubSpine spine = await ReadSpineAsync(xmlReader); result.Spine = spine; //Reading guide. And we actually don`t care if it is no present in our EPUB... bool isGuidePresent = await xmlReader.ReadToFollowingAsync("guide", "http://www.idpf.org/2007/opf"); if (isGuidePresent) { EpubGuide guide = await ReadGuideAsync(xmlReader); result.Guide = guide; } } } return result; }
public static EpubPackage ReadPackage(ZipArchive epubArchive, string rootFilePath) { ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath); if (rootFileEntry == null) throw new Exception("EPUB parsing error: root file not found in archive."); XmlDocument containerDocument; using (Stream containerStream = rootFileEntry.Open()) containerDocument = XmlUtils.LoadDocument(containerStream); XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(containerDocument.NameTable); xmlNamespaceManager.AddNamespace("opf", "http://www.idpf.org/2007/opf"); XmlNode packageNode = containerDocument.DocumentElement.SelectSingleNode("/opf:package", xmlNamespaceManager); EpubPackage result = new EpubPackage(); string epubVersionValue = packageNode.Attributes["version"].Value; if (epubVersionValue == "2.0") result.EpubVersion = EpubVersion.EPUB_2; else if (epubVersionValue == "3.0") result.EpubVersion = EpubVersion.EPUB_3; else throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue)); XmlNode metadataNode = packageNode.SelectSingleNode("opf:metadata", xmlNamespaceManager); if (metadataNode == null) throw new Exception("EPUB parsing error: metadata not found in the package."); EpubMetadata metadata = ReadMetadata(metadataNode, result.EpubVersion); result.Metadata = metadata; XmlNode manifestNode = packageNode.SelectSingleNode("opf:manifest", xmlNamespaceManager); if (manifestNode == null) throw new Exception("EPUB parsing error: manifest not found in the package."); EpubManifest manifest = ReadManifest(manifestNode); result.Manifest = manifest; XmlNode spineNode = packageNode.SelectSingleNode("opf:spine", xmlNamespaceManager); if (spineNode == null) throw new Exception("EPUB parsing error: spine not found in the package."); EpubSpine spine = ReadSpine(spineNode); result.Spine = spine; XmlNode guideNode = packageNode.SelectSingleNode("opf:guide", xmlNamespaceManager); if (guideNode != null) { EpubGuide guide = ReadGuide(guideNode); result.Guide = guide; } return result; }
public static async Task<EpubNavigation> ReadNavigationAsync(ZipArchive epubArchive, string contentDirectoryPath, EpubPackage package) { EpubNavigation result = new EpubNavigation(); string tocId = package.Spine.Toc; XmlReaderSettings xmlReaderSettings = new XmlReaderSettings { // XmlResolver = null, Async = true, DtdProcessing = DtdProcessing.Ignore }; if (String.IsNullOrEmpty(tocId)) throw new Exception("EPUB parsing error: TOC ID is empty."); //Cheking if toc id referenced in spine exist in manifest EpubManifestItem tocManifestItem = package.Manifest.FirstOrDefault(item => String.Compare(item.Id, tocId, StringComparison.OrdinalIgnoreCase) == 0); if (tocManifestItem == null) throw new Exception(String.Format("EPUB parsing error: TOC item {0} not found in EPUB manifest.", tocId)); //Opening .toc file in archive using href-reference from manifest string tocFileEntryPath = ZipPathUtils.Combine(contentDirectoryPath, tocManifestItem.Href); ZipArchiveEntry tocFileEntry = epubArchive.GetEntry(tocFileEntryPath); if (tocFileEntry == null) throw new Exception(String.Format("EPUB parsing error: TOC file {0} not found in archive.", tocFileEntryPath)); if (tocFileEntry.Length > Int32.MaxValue) throw new Exception(String.Format("EPUB parsing error: TOC file {0} is bigger than 2 Gb.", tocFileEntryPath)); // ------------------ Actual Parsing starts here: ------------------------- using (Stream containerStream = tocFileEntry.Open()) { using (XmlReader xmlReader = XmlReader.Create(containerStream, xmlReaderSettings)) { result.Head = await ReadNavigationHeadAsync(xmlReader); result.DocTitle = await ReadNavigationDocTitleAsync(xmlReader); result.DocAuthors = await ReadNavigationAuthorsAsync(xmlReader); result.NavMap = await ReadNavigationMapAsync(xmlReader); result.NavLists = new List<EpubNavigationList>(); //Empty, because not implemented result.PageList = new EpubNavigationPageList(); //Empty, because not implemented } } return result; //-------------------------------------------Boring old style Silverlight code...----------------------------------------------------------------- //------------------------------------------------------------------------------------------------------------------------------------------------ //XmlDocument containerDocument; //containerDocument = XmlDocument.Load(containerStream); //XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(containerDocument.NameTable); //xmlNamespaceManager.AddNamespace("ncx", "http://www.daisy.org/z3986/2005/ncx/"); ////Parsing head section //XmlNode headNode = containerDocument.DocumentElement.SelectSingleNode("ncx:head", xmlNamespaceManager); //if (headNode == null) // throw new Exception("EPUB parsing error: TOC file does not contain head element"); //EpubNavigationHead navigationHead = ReadNavigationHead(headNode); //result.Head = navigationHead; ////Parsing title //XmlNode docTitleNode = containerDocument.DocumentElement.SelectSingleNode("ncx:docTitle", xmlNamespaceManager); //if (docTitleNode == null) // throw new Exception("EPUB parsing error: TOC file does not contain docTitle element"); //EpubNavigationDocTitle navigationDocTitle = ReadNavigationDocTitle(docTitleNode); //result.DocTitle = navigationDocTitle; ////Parsing authors section... //result.DocAuthors = new List<EpubNavigationDocAuthor>(); //foreach (XmlNode docAuthorNode in containerDocument.DocumentElement.SelectNodes("ncx:docAuthor", xmlNamespaceManager)) //{ // EpubNavigationDocAuthor navigationDocAuthor = ReadNavigationDocAuthor(docAuthorNode); // result.DocAuthors.Add(navigationDocAuthor); //} //Parsing navMap section //XmlNode navMapNode = containerDocument.DocumentElement.SelectSingleNode("ncx:navMap", xmlNamespaceManager); //if (navMapNode == null) // throw new Exception("EPUB parsing error: TOC file does not contain navMap element"); //EpubNavigationMap navMap = ReadNavigationMap(navMapNode); //result.NavMap = navMap; //-----------------------------------TO-DO: Implement ----------------------------------------------------------- //TO-DO: Implement pageList parsing. Needed to tide-up position inside epub to actual pages of the printed book //-------------------------------------------------------------------------------------------------------------- //Parsing pageList node //XmlNode pageListNode = containerDocument.DocumentElement.SelectSingleNode("ncx:pageList", xmlNamespaceManager); //if (pageListNode != null) //{ // EpubNavigationPageList pageList = ReadNavigationPageList(pageListNode); // result.PageList = pageList; //} ////TO-DO: Implement navList parsing. It is a secondary navigation system for supplied book info - schemes, fugures, diagrams, illustrations etc ////Parsing navList nodes //result.NavLists = new List<EpubNavigationList>(); //foreach (XmlNode navigationListNode in containerDocument.DocumentElement.SelectNodes("ncx:navList", xmlNamespaceManager)) //{ // EpubNavigationList navigationList = ReadNavigationList(navigationListNode); // result.NavLists.Add(navigationList); //} //-------------------------------------------------------------------------------------------------------------- }