Пример #1
0
        public void TestEpubMetadataGetOpfPath()
        {
            var epubPath1 = "/home/steve/test/epubs/This is a test";
            var metaXml1  = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<container xmlns=""urn:oasis:names:tc:opendocument:xmlns:container"" version=""1.0"">
   <rootfiles>
      <rootfile full-path=""content/book.opf"" media-type=""application/oebps-package+xml""/>
   </rootfiles>
</container>";

            // SUT
            var result1 = EpubMetadata.GetOpfPath(epubPath1, metaXml1);

            Assert.That(result1, Is.EqualTo(Path.Combine(epubPath1, "content", "book.opf")));

            var epubPath2 = "C:\\Users\\steve\\Documents\\epubs\\Testing Away";
            var metaXml2  = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<container version=""1.0"" xmlns=""urn:oasis:names:tc:opendocument:xmlns:container"">
  <rootfiles>
    <rootfile full-path=""OEBPS/package.opf"" media-type=""application/oebps-package+xml""/>
  </rootfiles>
</container>";

            // SUT
            var result2 = EpubMetadata.GetOpfPath(epubPath2, metaXml2);

            Assert.That(result2, Is.EqualTo(Path.Combine(epubPath2, "OEBPS", "package.opf")));
        }
Пример #2
0
        public void TestEpubMetadataLoading()
        {
            var epubPath1 = "/home/steve/test/epubs/Test";
            var opfPath1  = "/home/steve/test/epubs/Test/content/book.opf";

            // SUT
            var epubMeta = new EpubMetadata(epubPath1, opfPath1, _opfXml);

            Assert.That(epubMeta.Authors.Count, Is.EqualTo(1));
            Assert.That(epubMeta.Authors[0], Is.EqualTo("Alice Nakasango"));
            Assert.That(epubMeta.Illustrators.Count, Is.EqualTo(1));
            Assert.That(epubMeta.Illustrators[0], Is.EqualTo("Marleen Visser"));
            Assert.That(epubMeta.OtherCreators.Count, Is.EqualTo(0));
            Assert.That(epubMeta.OtherContributors.Count, Is.EqualTo(0));
            Assert.That(epubMeta.Description, Is.EqualTo("Now you know why goats are so stubborn!"));
            Assert.That(epubMeta.Title, Is.EqualTo("Goat, The False King"));
            Assert.That(epubMeta.LanguageCode, Is.EqualTo("en"));
            Assert.That(epubMeta.Identifier, Is.EqualTo("4f513a80-8f36-46c5-a73f-3169420c5c24"));
            Assert.That(epubMeta.Modified, Is.EqualTo(DateTime.Parse("2020-02-11T11:03:04Z")));
            Assert.That(epubMeta.ImageFiles.Count, Is.EqualTo(21));
            Assert.That(epubMeta.ImageFiles[0], Is.EqualTo(Path.Combine(epubPath1, "content", "c7b42f14c72ad4a3b3488c4377b70d94.jpg")));
            Assert.That(epubMeta.PageFiles.Count, Is.EqualTo(20));
            Assert.That(epubMeta.PageFiles[6], Is.EqualTo(Path.Combine(epubPath1, "content", "chapter-7.xhtml")));
        }
Пример #3
0
        public static EpubPackage ReadPackage(ZipArchive epubArchive, string rootFilePath)
        {
            ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath);

            if (rootFileEntry == null)
            {
                throw new Exception("EPUB parsing error: root file not found in archive.");
            }
            XmlDocument containerDocument;

            using (Stream containerStream = rootFileEntry.Open())
                containerDocument = XmlUtils.LoadDocument(containerStream);
            XmlNamespaceManager xmlNamespaceManager = new XmlNamespaceManager(containerDocument.NameTable);

            xmlNamespaceManager.AddNamespace("opf", "http://www.idpf.org/2007/opf");
            XmlNode     packageNode      = containerDocument.DocumentElement.SelectSingleNode("/opf:package", xmlNamespaceManager);
            EpubPackage result           = new EpubPackage();
            string      epubVersionValue = packageNode.Attributes["version"].Value;

            if (epubVersionValue == "2.0")
            {
                result.EpubVersion = EpubVersion.EPUB_2;
            }
            else
            if (epubVersionValue == "3.0")
            {
                result.EpubVersion = EpubVersion.EPUB_3;
            }
            else
            {
                throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue));
            }
            XmlNode metadataNode = packageNode.SelectSingleNode("opf:metadata", xmlNamespaceManager);

            if (metadataNode == null)
            {
                throw new Exception("EPUB parsing error: metadata not found in the package.");
            }
            EpubMetadata metadata = ReadMetadata(metadataNode, result.EpubVersion);

            result.Metadata = metadata;
            XmlNode manifestNode = packageNode.SelectSingleNode("opf:manifest", xmlNamespaceManager);

            if (manifestNode == null)
            {
                throw new Exception("EPUB parsing error: manifest not found in the package.");
            }
            EpubManifest manifest = ReadManifest(manifestNode);

            result.Manifest = manifest;
            XmlNode spineNode = packageNode.SelectSingleNode("opf:spine", xmlNamespaceManager);

            if (spineNode == null)
            {
                throw new Exception("EPUB parsing error: spine not found in the package.");
            }
            EpubSpine spine = ReadSpine(spineNode);

            result.Spine = spine;
            XmlNode guideNode = packageNode.SelectSingleNode("opf:guide", xmlNamespaceManager);

            if (guideNode != null)
            {
                EpubGuide guide = ReadGuide(guideNode);
                result.Guide = guide;
            }
            return(result);
        }
Пример #4
0
        private static EpubMetadata ReadMetadata(XmlNode metadataNode, EpubVersion epubVersion)
        {
            EpubMetadata result = new EpubMetadata();

            result.Titles       = new List <string>();
            result.Creators     = new List <EpubMetadataCreator>();
            result.Subjects     = new List <string>();
            result.Publishers   = new List <string>();
            result.Contributors = new List <EpubMetadataContributor>();
            result.Dates        = new List <EpubMetadataDate>();
            result.Types        = new List <string>();
            result.Formats      = new List <string>();
            result.Identifiers  = new List <EpubMetadataIdentifier>();
            result.Sources      = new List <string>();
            result.Languages    = new List <string>();
            result.Relations    = new List <string>();
            result.Coverages    = new List <string>();
            result.Rights       = new List <string>();
            result.MetaItems    = new List <EpubMetadataMeta>();
            foreach (XmlNode metadataItemNode in metadataNode.ChildNodes)
            {
                string innerText = metadataItemNode.InnerText;
                switch (metadataItemNode.LocalName.ToLowerInvariant())
                {
                case "title":
                    result.Titles.Add(innerText);
                    break;

                case "creator":
                    EpubMetadataCreator creator = ReadMetadataCreator(metadataItemNode);
                    result.Creators.Add(creator);
                    break;

                case "subject":
                    result.Subjects.Add(innerText);
                    break;

                case "description":
                    result.Description = innerText;
                    break;

                case "publisher":
                    result.Publishers.Add(innerText);
                    break;

                case "contributor":
                    EpubMetadataContributor contributor = ReadMetadataContributor(metadataItemNode);
                    result.Contributors.Add(contributor);
                    break;

                case "date":
                    EpubMetadataDate date = ReadMetadataDate(metadataItemNode);
                    result.Dates.Add(date);
                    break;

                case "type":
                    result.Types.Add(innerText);
                    break;

                case "format":
                    result.Formats.Add(innerText);
                    break;

                case "identifier":
                    EpubMetadataIdentifier identifier = ReadMetadataIdentifier(metadataItemNode);
                    result.Identifiers.Add(identifier);
                    break;

                case "source":
                    result.Sources.Add(innerText);
                    break;

                case "language":
                    result.Languages.Add(innerText);
                    break;

                case "relation":
                    result.Relations.Add(innerText);
                    break;

                case "coverage":
                    result.Coverages.Add(innerText);
                    break;

                case "rights":
                    result.Rights.Add(innerText);
                    break;

                case "meta":
                    if (epubVersion == EpubVersion.EPUB_2)
                    {
                        EpubMetadataMeta meta = ReadMetadataMetaVersion2(metadataItemNode);
                        result.MetaItems.Add(meta);
                    }
                    else
                    if (epubVersion == EpubVersion.EPUB_3)
                    {
                        EpubMetadataMeta meta = ReadMetadataMetaVersion3(metadataItemNode);
                        result.MetaItems.Add(meta);
                    }
                    break;
                }
            }
            return(result);
        }
Пример #5
0
        private static EpubMetadata ReadMetadata(XElement metadataNode, EpubVersion epubVersion)
        {
            var result = new EpubMetadata
            {
                Titles       = new List <string>(),
                Creators     = new List <EpubMetadataCreator>(),
                Subjects     = new List <string>(),
                Publishers   = new List <string>(),
                Contributors = new List <EpubMetadataContributor>(),
                Dates        = new List <EpubMetadataDate>(),
                Types        = new List <string>(),
                Formats      = new List <string>(),
                Identifiers  = new List <EpubMetadataIdentifier>(),
                Sources      = new List <string>(),
                Languages    = new List <string>(),
                Relations    = new List <string>(),
                Coverages    = new List <string>(),
                Rights       = new List <string>(),
                MetaItems    = new List <EpubMetadataMeta>()
            };

            foreach (var metadataItemNode in metadataNode.Elements())
            {
                var innerText = metadataItemNode.Value;
                switch (metadataItemNode.GetLowerCaseLocalName())
                {
                case "title":
                    result.Titles.Add(innerText);
                    break;

                case "creator":
                    var creator = ReadMetadataCreator(metadataItemNode);
                    result.Creators.Add(creator);
                    break;

                case "subject":
                    result.Subjects.Add(innerText);
                    break;

                case "description":
                    result.Description = innerText;
                    break;

                case "publisher":
                    result.Publishers.Add(innerText);
                    break;

                case "contributor":
                    var contributor = ReadMetadataContributor(metadataItemNode);
                    result.Contributors.Add(contributor);
                    break;

                case "date":
                    var date = ReadMetadataDate(metadataItemNode);
                    result.Dates.Add(date);
                    break;

                case "type":
                    result.Types.Add(innerText);
                    break;

                case "format":
                    result.Formats.Add(innerText);
                    break;

                case "identifier":
                    var identifier = ReadMetadataIdentifier(metadataItemNode);
                    result.Identifiers.Add(identifier);
                    break;

                case "source":
                    result.Sources.Add(innerText);
                    break;

                case "language":
                    result.Languages.Add(innerText);
                    break;

                case "relation":
                    result.Relations.Add(innerText);
                    break;

                case "coverage":
                    result.Coverages.Add(innerText);
                    break;

                case "rights":
                    result.Rights.Add(innerText);
                    break;

                case "meta":
                    if (epubVersion == EpubVersion.EPUB_2)
                    {
                        var meta = ReadMetadataMetaVersion2(metadataItemNode);
                        result.MetaItems.Add(meta);
                    }
                    else if (epubVersion == EpubVersion.EPUB_3_0 || epubVersion == EpubVersion.EPUB_3_1)
                    {
                        var meta = ReadMetadataMetaVersion3(metadataItemNode);
                        result.MetaItems.Add(meta);
                    }

                    break;
                }
            }

            return(result);
        }
Пример #6
0
        public static async Task <EpubPackage> ReadPackageAsync(ZipArchive epubArchive, string rootFilePath)
        {
            ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath);

            if (rootFileEntry == null)
            {
                throw new Exception("EPUB parsing error: root file not found in archive.");
            }
            XDocument containerDocument;

            using (Stream containerStream = rootFileEntry.Open())
                containerDocument = await XmlUtils.LoadDocumentAsync(containerStream).ConfigureAwait(false);
            XNamespace  opfNamespace     = "http://www.idpf.org/2007/opf";
            XElement    packageNode      = containerDocument.Element(opfNamespace + "package");
            EpubPackage result           = new EpubPackage();
            string      epubVersionValue = packageNode.Attribute("version").Value;

            if (epubVersionValue == "2.0")
            {
                result.EpubVersion = EpubVersion.EPUB_2;
            }
            else
            if (epubVersionValue == "3.0")
            {
                result.EpubVersion = EpubVersion.EPUB_3;
            }
            else
            {
                throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue));
            }
            XElement metadataNode = packageNode.Element(opfNamespace + "metadata");

            if (metadataNode == null)
            {
                throw new Exception("EPUB parsing error: metadata not found in the package.");
            }
            EpubMetadata metadata = ReadMetadata(metadataNode, result.EpubVersion);

            result.Metadata = metadata;
            XElement manifestNode = packageNode.Element(opfNamespace + "manifest");

            if (manifestNode == null)
            {
                throw new Exception("EPUB parsing error: manifest not found in the package.");
            }
            EpubManifest manifest = ReadManifest(manifestNode);

            result.Manifest = manifest;
            XElement spineNode = packageNode.Element(opfNamespace + "spine");

            if (spineNode == null)
            {
                throw new Exception("EPUB parsing error: spine not found in the package.");
            }
            EpubSpine spine = ReadSpine(spineNode);

            result.Spine = spine;
            XElement guideNode = packageNode.Element(opfNamespace + "guide");

            if (guideNode != null)
            {
                EpubGuide guide = ReadGuide(guideNode);
                result.Guide = guide;
            }
            return(result);
        }
Пример #7
0
        private static async System.Threading.Tasks.Task <EpubMetadata> ReadMetadataAsync(XmlReader reader, EpubVersion epubVersion)
        {
            EpubMetadata result = new EpubMetadata();

            result.Titles       = new List <string>();
            result.Creators     = new List <EpubMetadataCreator>();
            result.Subjects     = new List <string>();
            result.Publishers   = new List <string>();
            result.Contributors = new List <EpubMetadataContributor>();
            result.Dates        = new List <EpubMetadataDate>();
            result.Types        = new List <string>();
            result.Formats      = new List <string>();
            result.Identifiers  = new List <EpubMetadataIdentifier>();
            result.Sources      = new List <string>();
            result.Languages    = new List <string>();
            result.Relations    = new List <string>();
            result.Coverages    = new List <string>();
            result.Rights       = new List <string>();
            result.MetaItems    = new List <EpubMetadataMeta>();

            //Parsing all metadata insides and saving it in EpubMetadata instance
            //

            //Мне нужно пройтись по всем нодам внутри метадаты последовательно, извлечь ноды указанные в массиве metadataNodesNames...
            //... и сохранить их в структуре EpubMetadata
            //В каждой итерации нам нужно извлечь имя нода, сделать маленькими буквами и,
            // в зависимости от того есть ли он в массиве - выполнить запись в структуру
            //ИЛИ мы можем тупо искать по заданным в массиве именам, с опасностью, что какая-то сука написала капсами и это ебнет весь ридер
            //
            bool isMetadataAvailable = await reader.ReadToFollowingAsync("metadata", "http://www.idpf.org/2007/opf");

            if (!isMetadataAvailable)
            {
                throw new Exception("EPUB parsing error: metadata not found in the package.");
            }

            while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "metadata"))
            {
                switch (reader.NodeType)
                {
                case XmlNodeType.Element:
                    switch (reader.LocalName.ToLowerInvariant())
                    {
                    case "title":
                        result.Titles.Add(reader.ReadElementContentAsString());
                        break;

                    case "creator":
                        EpubMetadataCreator creator = new EpubMetadataCreator();
                        creator.Role    = reader.GetAttribute("opf:role");
                        creator.FileAs  = reader.GetAttribute("opf:file-as");
                        creator.Creator = reader.ReadElementContentAsString();
                        result.Creators.Add(creator);
                        break;

                    case "subject":
                        result.Subjects.Add(reader.ReadElementContentAsString());
                        break;

                    case "description":
                        result.Description = reader.ReadElementContentAsString();
                        break;

                    case "publisher":
                        result.Publishers.Add(reader.ReadElementContentAsString());
                        break;

                    case "contributor":
                        EpubMetadataContributor contributor = new EpubMetadataContributor();
                        contributor.Role        = reader.GetAttribute("opf:role");
                        contributor.FileAs      = reader.GetAttribute("opf:file-as");
                        contributor.Contributor = reader.ReadElementContentAsString();
                        result.Contributors.Add(contributor);
                        break;

                    case "date":
                        EpubMetadataDate date = new EpubMetadataDate();
                        date.Event = reader.GetAttribute("opf:event");
                        date.Date  = reader.ReadElementContentAsString();
                        result.Dates.Add(date);
                        break;

                    case "type":
                        result.Types.Add(reader.ReadElementContentAsString());
                        break;

                    case "format":
                        result.Formats.Add(reader.ReadElementContentAsString());
                        break;

                    case "identifier":
                        EpubMetadataIdentifier identifier = new EpubMetadataIdentifier();
                        identifier.Id         = reader.GetAttribute("id");
                        identifier.Scheme     = reader.GetAttribute("opf:scheme");
                        identifier.Identifier = reader.ReadElementContentAsString();
                        result.Identifiers.Add(identifier);
                        break;

                    case "source":
                        result.Sources.Add(reader.ReadElementContentAsString());
                        break;

                    case "language":
                        result.Languages.Add(reader.ReadElementContentAsString());
                        break;

                    case "relation":
                        result.Relations.Add(reader.ReadElementContentAsString());
                        break;

                    case "coverage":
                        result.Coverages.Add(reader.ReadElementContentAsString());
                        break;

                    case "rights":
                        result.Rights.Add(reader.ReadElementContentAsString());
                        break;

                    //looks like there is an optional refining node "meta" and it is present in EPUB3
                    case "meta":
                        if (epubVersion == EpubVersion.EPUB_2)
                        {
                            EpubMetadataMeta meta = new EpubMetadataMeta();
                            meta.Name    = reader.GetAttribute("name");
                            meta.Content = reader.GetAttribute("content");
                            result.MetaItems.Add(meta);
                        }
                        else
                        if (epubVersion == EpubVersion.EPUB_3)
                        {
                            EpubMetadataMeta meta = new EpubMetadataMeta();
                            meta.Id       = reader.GetAttribute("id");
                            meta.Refines  = reader.GetAttribute("refines");
                            meta.Property = reader.GetAttribute("property");
                            meta.Scheme   = reader.GetAttribute("scheme");
                            meta.Content  = reader.ReadElementContentAsString();
                            result.MetaItems.Add(meta);
                        }
                        break;
                    }
                    break;
                }
            }

            return(result);
        }
Пример #8
0
        //Parsing metadata, manifest, spine and guide
        public static async Task <EpubPackage> ReadPackageAsync(ZipArchive epubArchive, string rootFilePath)
        {
            EpubPackage result = new EpubPackage();

            XmlReaderSettings xmlReaderSettings = new XmlReaderSettings
            {
                // XmlResolver = null,
                Async         = true,
                DtdProcessing = DtdProcessing.Ignore
            };

            ZipArchiveEntry rootFileEntry = epubArchive.GetEntry(rootFilePath);

            if (rootFileEntry == null)
            {
                throw new Exception(string.Format("EPUB parsing error: {0} file not found in archive.", rootFilePath));
            }
            //Starting content.opf parsing...
            using (Stream containerStream = rootFileEntry.Open())
            {
                using (XmlReader xmlReader = XmlReader.Create(containerStream, xmlReaderSettings))
                {
                    await xmlReader.ReadToFollowingAsync("package", "http://www.idpf.org/2007/opf");

                    //Trying to get version attribute from <package version=...
                    //Looks like we only need EPUB version data and we don`t care about unique-identifier
                    //if EPUB version is FUBAR then throwing an exeption
                    xmlReader.MoveToAttribute("version");
                    string epubVersionValue = xmlReader.Value;
                    if (epubVersionValue == "2.0")
                    {
                        result.EpubVersion = EpubVersion.EPUB_2;
                    }
                    else
                    if (epubVersionValue == "3.0")
                    {
                        result.EpubVersion = EpubVersion.EPUB_3;
                    }
                    else
                    {
                        throw new Exception(String.Format("Unsupported EPUB version: {0}.", epubVersionValue));
                    }

                    //Reading metadata
                    EpubMetadata metadata = await ReadMetadataAsync(xmlReader, result.EpubVersion);

                    result.Metadata = metadata;
                    //Reading manifest
                    EpubManifest manifest = await ReadManifestAsync(xmlReader);

                    result.Manifest = manifest;
                    //Reading spine
                    EpubSpine spine = await ReadSpineAsync(xmlReader);

                    result.Spine = spine;
                    //Reading guide. And we actually don`t care if it is no present in our EPUB...
                    bool isGuidePresent = await xmlReader.ReadToFollowingAsync("guide", "http://www.idpf.org/2007/opf");

                    if (isGuidePresent)
                    {
                        EpubGuide guide = await ReadGuideAsync(xmlReader);

                        result.Guide = guide;
                    }
                }
            }

            return(result);
        }
Пример #9
0
 private void LoadAllMetadata()
 {
     metadata = new EpubMetadata();
     metadata.Contributor = new Metadata { Name = "Contributor", Value = epub.Contributer };
     metadata.Coverage = new Metadata { Name = "Coverage", Value = epub.Coverage};
     metadata.Author = new Metadata { Name = "Author", Value = epub.Creator };
     metadata.PublishDate = new Metadata { Name = "PublishDate", Value = epub.Date.Where(d => d.Type == "publication").Select(d => d.Date) };
     metadata.Description = new Metadata { Name = "Description", Value = epub.Description };
     metadata.Format = new Metadata { Name = "Format", Value = epub.Format };
     metadata.ID = new Metadata { Name = "ID", Value = epub.ID };
     metadata.Language = new Metadata { Name = "Language", Value = epub.Language };
     metadata.Publisher = new Metadata { Name = "Publisher", Value = epub.Publisher };
     metadata.Relation = new Metadata { Name = "Relation", Value = epub.Relation };
     metadata.Rights = new Metadata { Name = "Rights", Value = epub.Rights };
     metadata.Source = new Metadata { Name = "Source", Value = epub.Source };
     metadata.Subject = new Metadata { Name = "Subject", Value = epub.Subject };
     metadata.Title = new Metadata { Name = "Title", Value = epub.Title };
     metadata.Type = new Metadata { Name = "Type", Value = epub.Type };
     metadata.UUID = new Metadata { Name = "UUID", Value = new[] { epub.UUID } };
 }