private static async System.Threading.Tasks.Task<EpubMetadata> ReadMetadataAsync(XmlReader reader, EpubVersion epubVersion) { EpubMetadata result = new EpubMetadata(); result.Titles = new List<string>(); result.Creators = new List<EpubMetadataCreator>(); result.Subjects = new List<string>(); result.Publishers = new List<string>(); result.Contributors = new List<EpubMetadataContributor>(); result.Dates = new List<EpubMetadataDate>(); result.Types = new List<string>(); result.Formats = new List<string>(); result.Identifiers = new List<EpubMetadataIdentifier>(); result.Sources = new List<string>(); result.Languages = new List<string>(); result.Relations = new List<string>(); result.Coverages = new List<string>(); result.Rights = new List<string>(); result.MetaItems = new List<EpubMetadataMeta>(); //Parsing all metadata insides and saving it in EpubMetadata instance // //Мне нужно пройтись по всем нодам внутри метадаты последовательно, извлечь ноды указанные в массиве metadataNodesNames... //... и сохранить их в структуре EpubMetadata //В каждой итерации нам нужно извлечь имя нода, сделать маленькими буквами и, // в зависимости от того есть ли он в массиве - выполнить запись в структуру //ИЛИ мы можем тупо искать по заданным в массиве именам, с опасностью, что какая-то сука написала капсами и это ебнет весь ридер // bool isMetadataAvailable = await reader.ReadToFollowingAsync("metadata", "http://www.idpf.org/2007/opf"); if (!isMetadataAvailable) throw new Exception("EPUB parsing error: metadata not found in the package."); while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "metadata")) { switch (reader.NodeType) { case XmlNodeType.Element: switch (reader.LocalName.ToLowerInvariant()) { case "title": result.Titles.Add(reader.ReadElementContentAsString()); break; case "creator": EpubMetadataCreator creator = new EpubMetadataCreator(); creator.Role = reader.GetAttribute("opf:role"); creator.FileAs = reader.GetAttribute("opf:file-as"); creator.Creator = reader.ReadElementContentAsString(); result.Creators.Add(creator); break; case "subject": result.Subjects.Add(reader.ReadElementContentAsString()); break; case "description": result.Description = reader.ReadElementContentAsString(); break; case "publisher": result.Publishers.Add(reader.ReadElementContentAsString()); break; case "contributor": EpubMetadataContributor contributor = new EpubMetadataContributor(); contributor.Role = reader.GetAttribute("opf:role"); contributor.FileAs = reader.GetAttribute("opf:file-as"); contributor.Contributor = reader.ReadElementContentAsString(); result.Contributors.Add(contributor); break; case "date": EpubMetadataDate date = new EpubMetadataDate(); date.Event = reader.GetAttribute("opf:event"); date.Date = reader.ReadElementContentAsString(); result.Dates.Add(date); break; case "type": result.Types.Add(reader.ReadElementContentAsString()); break; case "format": result.Formats.Add(reader.ReadElementContentAsString()); break; case "identifier": EpubMetadataIdentifier identifier = new EpubMetadataIdentifier(); identifier.Id = reader.GetAttribute("id"); identifier.Scheme = reader.GetAttribute("opf:scheme"); identifier.Identifier = reader.ReadElementContentAsString(); result.Identifiers.Add(identifier); break; case "source": result.Sources.Add(reader.ReadElementContentAsString()); break; case "language": result.Languages.Add(reader.ReadElementContentAsString()); break; case "relation": result.Relations.Add(reader.ReadElementContentAsString()); break; case "coverage": result.Coverages.Add(reader.ReadElementContentAsString()); break; case "rights": result.Rights.Add(reader.ReadElementContentAsString()); break; //looks like there is an optional refining node "meta" and it is present in EPUB3 case "meta": if (epubVersion == EpubVersion.EPUB_2) { EpubMetadataMeta meta = new EpubMetadataMeta(); meta.Name = reader.GetAttribute("name"); meta.Content = reader.GetAttribute("content"); result.MetaItems.Add(meta); } else if (epubVersion == EpubVersion.EPUB_3) { EpubMetadataMeta meta = new EpubMetadataMeta(); meta.Id = reader.GetAttribute("id"); meta.Refines = reader.GetAttribute("refines"); meta.Property = reader.GetAttribute("property"); meta.Scheme = reader.GetAttribute("scheme"); meta.Content = reader.ReadElementContentAsString(); result.MetaItems.Add(meta); } break; } break; } } return result; }
private static async System.Threading.Tasks.Task<EpubManifest> ReadManifestAsync(XmlReader reader) { EpubManifest result = new EpubManifest(); bool isManifestFound = await reader.ReadToFollowingAsync("manifest", "http://www.idpf.org/2007/opf"); if (!isManifestFound) throw new Exception("EPUB parsing error: manifest declarations not found in the package."); while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "manifest")) { if (!String.IsNullOrWhiteSpace(reader.LocalName)) { EpubManifestItem manifestItem = new EpubManifestItem(); switch (reader.LocalName.ToLowerInvariant()) { case "item": while (reader.MoveToNextAttribute()) { switch (reader.LocalName.ToLowerInvariant()) { case "id": manifestItem.Id = reader.Value; break; case "href": manifestItem.Href = reader.Value; break; case "media-type": manifestItem.MediaType = reader.Value; break; case "required-namespace": manifestItem.RequiredNamespace = reader.Value; break; case "required-modules": manifestItem.RequiredModules = reader.Value; break; case "fallback": manifestItem.Fallback = reader.Value; break; case "fallback-style": manifestItem.FallbackStyle = reader.Value; break; } } break; } if (String.IsNullOrWhiteSpace(manifestItem.Id)) throw new Exception("Incorrect EPUB manifest: item ID is missing"); if (String.IsNullOrWhiteSpace(manifestItem.Href)) throw new Exception("Incorrect EPUB manifest: item href is missing"); if (String.IsNullOrWhiteSpace(manifestItem.MediaType)) throw new Exception("Incorrect EPUB manifest: item media type is missing"); result.Add(manifestItem); } } return result; }
private static async Task<EpubSpine> ReadSpineAsync(XmlReader reader) { EpubSpine result = new EpubSpine(); bool spineFound = await reader.ReadToFollowingAsync("spine", "http://www.idpf.org/2007/opf"); if (!spineFound) throw new Exception("EPUB parsing error: spine declarations not found in the package."); if (String.IsNullOrWhiteSpace(reader.GetAttribute("toc"))) throw new Exception("Incorrect EPUB spine: TOC attribute is missing or empty"); result.Toc = reader.GetAttribute("toc"); while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "spine")) { if (reader.LocalName.ToLowerInvariant() == "itemref") { EpubSpineItemRef spineItemRef = new EpubSpineItemRef(); spineItemRef.IsLinear = true; while (reader.MoveToNextAttribute()) { switch (reader.LocalName.ToLowerInvariant()) { case "idref": spineItemRef.IdRef = reader.Value; break; case "linear": if (reader.Value.ToLowerInvariant() == "no") { spineItemRef.IsLinear = false; } break; } } result.Add(spineItemRef); } } return result; }
private static async Task<EpubNavigationHead> ReadNavigationHeadAsync(XmlReader reader) { EpubNavigationHead result = new EpubNavigationHead(); //"ncx:head" is our starting point bool headFound = await reader.ReadToFollowingAsync("head", "http://www.daisy.org/z3986/2005/ncx/"); if (!headFound) throw new Exception("EPUB parsing error: head section not found in the .toc file."); while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "head")) { if (reader.LocalName.ToLowerInvariant() == "meta") { EpubNavigationHeadMeta meta = new EpubNavigationHeadMeta(); while (reader.MoveToNextAttribute()) { switch (reader.LocalName.ToLowerInvariant()) { case "name": meta.Name = reader.Value; break; case "content": meta.Content = reader.Value; break; case "scheme": meta.Scheme = reader.Value; break; } } if (String.IsNullOrWhiteSpace(meta.Name)) throw new Exception("Incorrect EPUB navigation meta: meta name is missing"); if (meta.Content == null) throw new Exception("Incorrect EPUB navigation meta: meta content is missing"); result.Add(meta); } } return result; }
private static async Task<EpubNavigationDocTitle> ReadNavigationDocTitleAsync(XmlReader reader) { EpubNavigationDocTitle result = new EpubNavigationDocTitle(); bool titleFound = await reader.ReadToFollowingAsync("docTitle", "http://www.daisy.org/z3986/2005/ncx/"); if (!titleFound) throw new Exception("EPUB parsing error: title section not found in the .toc file."); while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "docTitle")) { if (reader.LocalName.ToLowerInvariant() == "text") { result.Add(reader.ReadElementContentAsString()); } } return result; }
private static async Task<List<EpubNavigationDocAuthor>> ReadNavigationAuthorsAsync(XmlReader reader) { List<EpubNavigationDocAuthor> result = new List<EpubNavigationDocAuthor>(); bool authorFound = await reader.ReadToFollowingAsync("docAuthor", "http://www.daisy.org/z3986/2005/ncx/"); ////we don't really care if there is no authors mentioned in toc file... But we could save a warning to a log file if any //TO-DO: This code is very week as I don`t have any reliable tools to extract all of docAuthor nodes and parse them. //So I`m relying on basic EPUB structure that demands that file should have at least one navMap node and all docAuthors should come before it //I think I should rewrite this code later using LINQ to XML while (await reader.ReadAsync() && !(reader.IsStartElement() && reader.LocalName == "navMap")) { EpubNavigationDocAuthor author = new EpubNavigationDocAuthor(); if (reader.NodeType == XmlNodeType.Text) { author.Add(reader.Value); result.Add(author); } } return result; }
private static async Task<EpubNavigationContent> ReadNavigationContentAsync(XmlReader reader) { EpubNavigationContent result = new EpubNavigationContent(); bool contentFound = await reader.ReadToFollowingAsync("content", "http://www.daisy.org/z3986/2005/ncx/"); while (reader.MoveToNextAttribute()) { switch (reader.LocalName.ToLowerInvariant()) { case "id": result.Id = reader.Value; break; case "src": result.Source = reader.Value; break; } } if (String.IsNullOrWhiteSpace(result.Source)) throw new Exception("Incorrect EPUB navigation content: content source is missing"); reader.MoveToElement(); return result; }
//Reading navigation map starting from <navMap> node private static async Task<EpubNavigationMap> ReadNavigationMapAsync(XmlReader reader) { EpubNavigationMap result = new EpubNavigationMap(); bool mapFound = await reader.ReadToFollowingAsync("navMap", "http://www.daisy.org/z3986/2005/ncx/"); if (!mapFound) throw new Exception("EPUB parsing error: navMap section not found in the .toc file."); //reading till the </navMap> tag appearance while (await reader.ReadAsync() && !(reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "navMap")) { //We are looking for a top-level <navPoint> entries, considering that it could be any level of nesting: if ((reader.LocalName == "navPoint") && (reader.NodeType != XmlNodeType.EndElement)) { //We need to create a subreader space to limit the scope for each single navPoint XmlReader subReader = reader.ReadSubtree(); EpubNavigationPoint navigationPoint = await ReadNavigationPointAsync(subReader); //we reached the end of the top-level <navPoint> entry and it is time to add it to collection and to get rid of the sub-reader result.Add(navigationPoint); subReader.Dispose(); } } return result; }