public void TestEpubMetadataGetOpfPath() { var epubPath1 = "/home/steve/test/epubs/This is a test"; var metaXml1 = @"<?xml version=""1.0"" encoding=""UTF-8""?> <container xmlns=""urn:oasis:names:tc:opendocument:xmlns:container"" version=""1.0""> <rootfiles> <rootfile full-path=""content/book.opf"" media-type=""application/oebps-package+xml""/> </rootfiles> </container>"; // SUT var result1 = EpubMetadata.GetOpfPath(epubPath1, metaXml1); Assert.That(result1, Is.EqualTo(Path.Combine(epubPath1, "content", "book.opf"))); var epubPath2 = "C:\\Users\\steve\\Documents\\epubs\\Testing Away"; var metaXml2 = @"<?xml version=""1.0"" encoding=""UTF-8""?> <container version=""1.0"" xmlns=""urn:oasis:names:tc:opendocument:xmlns:container""> <rootfiles> <rootfile full-path=""OEBPS/package.opf"" media-type=""application/oebps-package+xml""/> </rootfiles> </container>"; // SUT var result2 = EpubMetadata.GetOpfPath(epubPath2, metaXml2); Assert.That(result2, Is.EqualTo(Path.Combine(epubPath2, "OEBPS", "package.opf"))); }
public void TestEpubMetadataLoading() { var epubPath1 = "/home/steve/test/epubs/Test"; var opfPath1 = "/home/steve/test/epubs/Test/content/book.opf"; // SUT var epubMeta = new EpubMetadata(epubPath1, opfPath1, _opfXml); Assert.That(epubMeta.Authors.Count, Is.EqualTo(1)); Assert.That(epubMeta.Authors[0], Is.EqualTo("Alice Nakasango")); Assert.That(epubMeta.Illustrators.Count, Is.EqualTo(1)); Assert.That(epubMeta.Illustrators[0], Is.EqualTo("Marleen Visser")); Assert.That(epubMeta.OtherCreators.Count, Is.EqualTo(0)); Assert.That(epubMeta.OtherContributors.Count, Is.EqualTo(0)); Assert.That(epubMeta.Description, Is.EqualTo("Now you know why goats are so stubborn!")); Assert.That(epubMeta.Title, Is.EqualTo("Goat, The False King")); Assert.That(epubMeta.LanguageCode, Is.EqualTo("en")); Assert.That(epubMeta.Identifier, Is.EqualTo("4f513a80-8f36-46c5-a73f-3169420c5c24")); Assert.That(epubMeta.Modified, Is.EqualTo(DateTime.Parse("2020-02-11T11:03:04Z"))); Assert.That(epubMeta.ImageFiles.Count, Is.EqualTo(21)); Assert.That(epubMeta.ImageFiles[0], Is.EqualTo(Path.Combine(epubPath1, "content", "c7b42f14c72ad4a3b3488c4377b70d94.jpg"))); Assert.That(epubMeta.PageFiles.Count, Is.EqualTo(20)); Assert.That(epubMeta.PageFiles[6], Is.EqualTo(Path.Combine(epubPath1, "content", "chapter-7.xhtml"))); }
public static EpubPackage ReadPackage(ZipArchive epubArchive, string rootFilePath) { ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath); if (rootFileEntry == null) { throw new Exception("EPUB parsing error: root file not found in archive."); } XmlDocument containerDocument; using (Stream containerStream = rootFileEntry.Open()) containerDocument = XmlUtils.LoadDocument(containerStream); XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(containerDocument.NameTable); xmlNamespaceManager.AddNamespace("opf", "http://www.idpf.org/2007/opf"); XmlNode packageNode = containerDocument.DocumentElement.SelectSingleNode("/opf:package", xmlNamespaceManager); EpubPackage result = new EpubPackage(); string epubVersionValue = packageNode.Attributes["version"].Value; if (epubVersionValue == "2.0") { result.EpubVersion = EpubVersion.EPUB_2; } else if (epubVersionValue == "3.0") { result.EpubVersion = EpubVersion.EPUB_3; } else { throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue)); } XmlNode metadataNode = packageNode.SelectSingleNode("opf:metadata", xmlNamespaceManager); if (metadataNode == null) { throw new Exception("EPUB parsing error: metadata not found in the package."); } EpubMetadata metadata = ReadMetadata(metadataNode, result.EpubVersion); result.Metadata = metadata; XmlNode manifestNode = packageNode.SelectSingleNode("opf:manifest", xmlNamespaceManager); if (manifestNode == null) { throw new Exception("EPUB parsing error: manifest not found in the package."); } EpubManifest manifest = ReadManifest(manifestNode); result.Manifest = manifest; XmlNode spineNode = packageNode.SelectSingleNode("opf:spine", xmlNamespaceManager); if (spineNode == null) { throw new Exception("EPUB parsing error: spine not found in the package."); } EpubSpine spine = ReadSpine(spineNode); result.Spine = spine; XmlNode guideNode = packageNode.SelectSingleNode("opf:guide", xmlNamespaceManager); if (guideNode != null) { EpubGuide guide = ReadGuide(guideNode); result.Guide = guide; } return(result); }
private static EpubMetadata ReadMetadata(XmlNode metadataNode, EpubVersion epubVersion) { EpubMetadata result = new EpubMetadata(); result.Titles = new List <string>(); result.Creators = new List <EpubMetadataCreator>(); result.Subjects = new List <string>(); result.Publishers = new List <string>(); result.Contributors = new List <EpubMetadataContributor>(); result.Dates = new List <EpubMetadataDate>(); result.Types = new List <string>(); result.Formats = new List <string>(); result.Identifiers = new List <EpubMetadataIdentifier>(); result.Sources = new List <string>(); result.Languages = new List <string>(); result.Relations = new List <string>(); result.Coverages = new List <string>(); result.Rights = new List <string>(); result.MetaItems = new List <EpubMetadataMeta>(); foreach (XmlNode metadataItemNode in metadataNode.ChildNodes) { string innerText = metadataItemNode.InnerText; switch (metadataItemNode.LocalName.ToLowerInvariant()) { case "title": result.Titles.Add(innerText); break; case "creator": EpubMetadataCreator creator = ReadMetadataCreator(metadataItemNode); result.Creators.Add(creator); break; case "subject": result.Subjects.Add(innerText); break; case "description": result.Description = innerText; break; case "publisher": result.Publishers.Add(innerText); break; case "contributor": EpubMetadataContributor contributor = ReadMetadataContributor(metadataItemNode); result.Contributors.Add(contributor); break; case "date": EpubMetadataDate date = ReadMetadataDate(metadataItemNode); result.Dates.Add(date); break; case "type": result.Types.Add(innerText); break; case "format": result.Formats.Add(innerText); break; case "identifier": EpubMetadataIdentifier identifier = ReadMetadataIdentifier(metadataItemNode); result.Identifiers.Add(identifier); break; case "source": result.Sources.Add(innerText); break; case "language": result.Languages.Add(innerText); break; case "relation": result.Relations.Add(innerText); break; case "coverage": result.Coverages.Add(innerText); break; case "rights": result.Rights.Add(innerText); break; case "meta": if (epubVersion == EpubVersion.EPUB_2) { EpubMetadataMeta meta = ReadMetadataMetaVersion2(metadataItemNode); result.MetaItems.Add(meta); } else if (epubVersion == EpubVersion.EPUB_3) { EpubMetadataMeta meta = ReadMetadataMetaVersion3(metadataItemNode); result.MetaItems.Add(meta); } break; } } return(result); }
private static EpubMetadata ReadMetadata(XElement metadataNode, EpubVersion epubVersion) { var result = new EpubMetadata { Titles = new List <string>(), Creators = new List <EpubMetadataCreator>(), Subjects = new List <string>(), Publishers = new List <string>(), Contributors = new List <EpubMetadataContributor>(), Dates = new List <EpubMetadataDate>(), Types = new List <string>(), Formats = new List <string>(), Identifiers = new List <EpubMetadataIdentifier>(), Sources = new List <string>(), Languages = new List <string>(), Relations = new List <string>(), Coverages = new List <string>(), Rights = new List <string>(), MetaItems = new List <EpubMetadataMeta>() }; foreach (var metadataItemNode in metadataNode.Elements()) { var innerText = metadataItemNode.Value; switch (metadataItemNode.GetLowerCaseLocalName()) { case "title": result.Titles.Add(innerText); break; case "creator": var creator = ReadMetadataCreator(metadataItemNode); result.Creators.Add(creator); break; case "subject": result.Subjects.Add(innerText); break; case "description": result.Description = innerText; break; case "publisher": result.Publishers.Add(innerText); break; case "contributor": var contributor = ReadMetadataContributor(metadataItemNode); result.Contributors.Add(contributor); break; case "date": var date = ReadMetadataDate(metadataItemNode); result.Dates.Add(date); break; case "type": result.Types.Add(innerText); break; case "format": result.Formats.Add(innerText); break; case "identifier": var identifier = ReadMetadataIdentifier(metadataItemNode); result.Identifiers.Add(identifier); break; case "source": result.Sources.Add(innerText); break; case "language": result.Languages.Add(innerText); break; case "relation": result.Relations.Add(innerText); break; case "coverage": result.Coverages.Add(innerText); break; case "rights": result.Rights.Add(innerText); break; case "meta": if (epubVersion == EpubVersion.EPUB_2) { var meta = ReadMetadataMetaVersion2(metadataItemNode); result.MetaItems.Add(meta); } else if (epubVersion == EpubVersion.EPUB_3_0 || epubVersion == EpubVersion.EPUB_3_1) { var meta = ReadMetadataMetaVersion3(metadataItemNode); result.MetaItems.Add(meta); } break; } } return(result); }
public static async Task <EpubPackage> ReadPackageAsync(ZipArchive epubArchive, string rootFilePath) { ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath); if (rootFileEntry == null) { throw new Exception("EPUB parsing error: root file not found in archive."); } XDocument containerDocument; using (Stream containerStream = rootFileEntry.Open()) containerDocument = await XmlUtils.LoadDocumentAsync(containerStream).ConfigureAwait(false); XNamespace opfNamespace = "http://www.idpf.org/2007/opf"; XElement packageNode = containerDocument.Element(opfNamespace + "package"); EpubPackage result = new EpubPackage(); string epubVersionValue = packageNode.Attribute("version").Value; if (epubVersionValue == "2.0") { result.EpubVersion = EpubVersion.EPUB_2; } else if (epubVersionValue == "3.0") { result.EpubVersion = EpubVersion.EPUB_3; } else { throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue)); } XElement metadataNode = packageNode.Element(opfNamespace + "metadata"); if (metadataNode == null) { throw new Exception("EPUB parsing error: metadata not found in the package."); } EpubMetadata metadata = ReadMetadata(metadataNode, result.EpubVersion); result.Metadata = metadata; XElement manifestNode = packageNode.Element(opfNamespace + "manifest"); if (manifestNode == null) { throw new Exception("EPUB parsing error: manifest not found in the package."); } EpubManifest manifest = ReadManifest(manifestNode); result.Manifest = manifest; XElement spineNode = packageNode.Element(opfNamespace + "spine"); if (spineNode == null) { throw new Exception("EPUB parsing error: spine not found in the package."); } EpubSpine spine = ReadSpine(spineNode); result.Spine = spine; XElement guideNode = packageNode.Element(opfNamespace + "guide"); if (guideNode != null) { EpubGuide guide = ReadGuide(guideNode); result.Guide = guide; } return(result); }
private static async System.Threading.Tasks.Task <EpubMetadata> ReadMetadataAsync(XmlReader reader, EpubVersion epubVersion) { EpubMetadata result = new EpubMetadata(); result.Titles = new List <string>(); result.Creators = new List <EpubMetadataCreator>(); result.Subjects = new List <string>(); result.Publishers = new List <string>(); result.Contributors = new List <EpubMetadataContributor>(); result.Dates = new List <EpubMetadataDate>(); result.Types = new List <string>(); result.Formats = new List <string>(); result.Identifiers = new List <EpubMetadataIdentifier>(); result.Sources = new List <string>(); result.Languages = new List <string>(); result.Relations = new List <string>(); result.Coverages = new List <string>(); result.Rights = new List <string>(); result.MetaItems = new List <EpubMetadataMeta>(); //Parsing all metadata insides and saving it in EpubMetadata instance // //Мне нужно пройтись по всем нодам внутри метадаты последовательно, извлечь ноды указанные в массиве metadataNodesNames... //... и сохранить их в структуре EpubMetadata //В каждой итерации нам нужно извлечь имя нода, сделать маленькими буквами и, // в зависимости от того есть ли он в массиве - выполнить запись в структуру //ИЛИ мы можем тупо искать по заданным в массиве именам, с опасностью, что какая-то сука написала капсами и это ебнет весь ридер // bool isMetadataAvailable = await reader.ReadToFollowingAsync("metadata", "http://www.idpf.org/2007/opf"); if (!isMetadataAvailable) { throw new Exception("EPUB parsing error: metadata not found in the package."); } while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "metadata")) { switch (reader.NodeType) { case XmlNodeType.Element: switch (reader.LocalName.ToLowerInvariant()) { case "title": result.Titles.Add(reader.ReadElementContentAsString()); break; case "creator": EpubMetadataCreator creator = new EpubMetadataCreator(); creator.Role = reader.GetAttribute("opf:role"); creator.FileAs = reader.GetAttribute("opf:file-as"); creator.Creator = reader.ReadElementContentAsString(); result.Creators.Add(creator); break; case "subject": result.Subjects.Add(reader.ReadElementContentAsString()); break; case "description": result.Description = reader.ReadElementContentAsString(); break; case "publisher": result.Publishers.Add(reader.ReadElementContentAsString()); break; case "contributor": EpubMetadataContributor contributor = new EpubMetadataContributor(); contributor.Role = reader.GetAttribute("opf:role"); contributor.FileAs = reader.GetAttribute("opf:file-as"); contributor.Contributor = reader.ReadElementContentAsString(); result.Contributors.Add(contributor); break; case "date": EpubMetadataDate date = new EpubMetadataDate(); date.Event = reader.GetAttribute("opf:event"); date.Date = reader.ReadElementContentAsString(); result.Dates.Add(date); break; case "type": result.Types.Add(reader.ReadElementContentAsString()); break; case "format": result.Formats.Add(reader.ReadElementContentAsString()); break; case "identifier": EpubMetadataIdentifier identifier = new EpubMetadataIdentifier(); identifier.Id = reader.GetAttribute("id"); identifier.Scheme = reader.GetAttribute("opf:scheme"); identifier.Identifier = reader.ReadElementContentAsString(); result.Identifiers.Add(identifier); break; case "source": result.Sources.Add(reader.ReadElementContentAsString()); break; case "language": result.Languages.Add(reader.ReadElementContentAsString()); break; case "relation": result.Relations.Add(reader.ReadElementContentAsString()); break; case "coverage": result.Coverages.Add(reader.ReadElementContentAsString()); break; case "rights": result.Rights.Add(reader.ReadElementContentAsString()); break; //looks like there is an optional refining node "meta" and it is present in EPUB3 case "meta": if (epubVersion == EpubVersion.EPUB_2) { EpubMetadataMeta meta = new EpubMetadataMeta(); meta.Name = reader.GetAttribute("name"); meta.Content = reader.GetAttribute("content"); result.MetaItems.Add(meta); } else if (epubVersion == EpubVersion.EPUB_3) { EpubMetadataMeta meta = new EpubMetadataMeta(); meta.Id = reader.GetAttribute("id"); meta.Refines = reader.GetAttribute("refines"); meta.Property = reader.GetAttribute("property"); meta.Scheme = reader.GetAttribute("scheme"); meta.Content = reader.ReadElementContentAsString(); result.MetaItems.Add(meta); } break; } break; } } return(result); }
//Parsing metadata, manifest, spine and guide public static async Task <EpubPackage> ReadPackageAsync(ZipArchive epubArchive, string rootFilePath) { EpubPackage result = new EpubPackage(); XmlReaderSettings xmlReaderSettings = new XmlReaderSettings { // XmlResolver = null, Async = true, DtdProcessing = DtdProcessing.Ignore }; ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath); if (rootFileEntry == null) { throw new Exception(string.Format("EPUB parsing error: {0} file not found in archive.", rootFilePath)); } //Starting content.opf parsing... using (Stream containerStream = rootFileEntry.Open()) { using (XmlReader xmlReader = XmlReader.Create(containerStream, xmlReaderSettings)) { await xmlReader.ReadToFollowingAsync("package", "http://www.idpf.org/2007/opf"); //Trying to get version attribute from <package version=... //Looks like we only need EPUB version data and we don`t care about unique-identifier //if EPUB version is FUBAR then throwing an exeption xmlReader.MoveToAttribute("version"); string epubVersionValue = xmlReader.Value; if (epubVersionValue == "2.0") { result.EpubVersion = EpubVersion.EPUB_2; } else if (epubVersionValue == "3.0") { result.EpubVersion = EpubVersion.EPUB_3; } else { throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue)); } //Reading metadata EpubMetadata metadata = await ReadMetadataAsync(xmlReader, result.EpubVersion); result.Metadata = metadata; //Reading manifest EpubManifest manifest = await ReadManifestAsync(xmlReader); result.Manifest = manifest; //Reading spine EpubSpine spine = await ReadSpineAsync(xmlReader); result.Spine = spine; //Reading guide. And we actually don`t care if it is no present in our EPUB... bool isGuidePresent = await xmlReader.ReadToFollowingAsync("guide", "http://www.idpf.org/2007/opf"); if (isGuidePresent) { EpubGuide guide = await ReadGuideAsync(xmlReader); result.Guide = guide; } } } return(result); }
private void LoadAllMetadata() { metadata = new EpubMetadata(); metadata.Contributor = new Metadata { Name = "Contributor", Value = epub.Contributer }; metadata.Coverage = new Metadata { Name = "Coverage", Value = epub.Coverage}; metadata.Author = new Metadata { Name = "Author", Value = epub.Creator }; metadata.PublishDate = new Metadata { Name = "PublishDate", Value = epub.Date.Where(d => d.Type == "publication").Select(d => d.Date) }; metadata.Description = new Metadata { Name = "Description", Value = epub.Description }; metadata.Format = new Metadata { Name = "Format", Value = epub.Format }; metadata.ID = new Metadata { Name = "ID", Value = epub.ID }; metadata.Language = new Metadata { Name = "Language", Value = epub.Language }; metadata.Publisher = new Metadata { Name = "Publisher", Value = epub.Publisher }; metadata.Relation = new Metadata { Name = "Relation", Value = epub.Relation }; metadata.Rights = new Metadata { Name = "Rights", Value = epub.Rights }; metadata.Source = new Metadata { Name = "Source", Value = epub.Source }; metadata.Subject = new Metadata { Name = "Subject", Value = epub.Subject }; metadata.Title = new Metadata { Name = "Title", Value = epub.Title }; metadata.Type = new Metadata { Name = "Type", Value = epub.Type }; metadata.UUID = new Metadata { Name = "UUID", Value = new[] { epub.UUID } }; }