public static string PayloadToPlainText(this EpubBook epubBook, List <string> ignoredFiles, List <string> ignoredFileEndings) { var epubTextFiles = new List <EpubTextFile>(); for (var index = 0; index < epubBook.SpecialResources.HtmlInReadingOrder.Count; index++) { var epubTextFile = epubBook.SpecialResources.HtmlInReadingOrder[index]; var skipFile = ignoredFiles.Any(ignoredFile => epubTextFile.FileName.ToLower().Contains(ignoredFile)) || ignoredFileEndings.Any(f => epubTextFile.FileName.ToLower().EndsWith(f)); if (!skipFile) { epubTextFiles.Add(epubTextFile); } } var builder = new StringBuilder(); for (var index = 0; index < epubTextFiles.Count; index++) { var html = epubTextFiles[index]; var contentAsPlainText = HtmlProcessor.GetContentAsPlainText(WebUtility.HtmlDecode(html.TextContent)); builder.Append(contentAsPlainText); builder.Append('\n'); } return(builder.ToString().Trim()); }
private static List <EpubChapter> LoadChapters(EpubBook book) { if (book.Format.Nav != null) { var tocNav = book.Format.Nav.Body.Navs.SingleOrDefault(e => e.Type == NavNav.Attributes.TypeValues.Toc); if (tocNav != null) { return(LoadChaptersFromNav(book.Format.Paths.NavAbsolutePath, tocNav.Dom)); } } if (book.Format.Ncx != null) { return(LoadChaptersFromNcx(book.Format.Paths.NcxAbsolutePath, book.Format.Ncx.NavMap.NavPoints)); } return(new List <EpubChapter>()); }
private static byte[] LoadCoverImage(EpubBook book) { if (book == null) { throw new ArgumentNullException(nameof(book)); } if (book.Format == null) { throw new ArgumentNullException(nameof(book.Format)); } var coverPath = book.Format.Opf.FindCoverPath(); if (coverPath == null) { return(null); } var coverImageFile = book.Resources.Images.SingleOrDefault(e => e.FileName == coverPath); return(coverImageFile?.Content); }
private string GetCoverPath() { Random rnd = new Random(); EpubSharp.EpubBook book = EpubReader.Read(FullPath); if (book.CoverImage != null) { var cover = book.CoverImage; Image image = ByteArrayToImage(cover); string coverName = string.Format("{0} {1}.jpg", Title, Convert.ToString(rnd.Next(50))); string coverPath = AppDomain.CurrentDomain.BaseDirectory + "Library\\Covers\\" + coverName; image.Save(coverPath, System.Drawing.Imaging.ImageFormat.Jpeg); return(coverPath); } else { if (!File.Exists(AppDomain.CurrentDomain.BaseDirectory + "Library\\Covers\\defoltCover.jpg")) { File.Copy(AppDomain.CurrentDomain.BaseDirectory + "images\\defoltCover.jpg", AppDomain.CurrentDomain.BaseDirectory + "Library\\Covers\\defoltCover.jpg"); } return(AppDomain.CurrentDomain.BaseDirectory + "Library\\Covers\\defoltCover.jpg"); } }
private static EpubSpecialResources LoadSpecialResources(ZipArchive epubArchive, EpubBook book) { var result = new EpubSpecialResources { Ocf = new EpubTextFile { FileName = Constants.OcfPath, ContentType = EpubContentType.Xml, MimeType = ContentType.ContentTypeToMimeType[EpubContentType.Xml], Content = epubArchive.LoadBytes(Constants.OcfPath) }, Opf = new EpubTextFile { FileName = book.Format.Ocf.RootFilePath, ContentType = EpubContentType.Xml, MimeType = ContentType.ContentTypeToMimeType[EpubContentType.Xml], Content = epubArchive.LoadBytes(book.Format.Ocf.RootFilePath) }, HtmlInReadingOrder = new List <EpubTextFile>() }; var htmlFiles = book.Format.Opf.Manifest.Items .Where(item => ContentType.MimeTypeToContentType.ContainsKey(item.MediaType) && ContentType.MimeTypeToContentType[item.MediaType] == EpubContentType.Xhtml11) .ToDictionary(item => item.Id, item => item.Href); foreach (var item in book.Format.Opf.Spine.ItemRefs) { string href; if (!htmlFiles.TryGetValue(item.IdRef, out href)) { continue; } var html = book.Resources.Html.SingleOrDefault(e => e.FileName == href); if (html != null) { result.HtmlInReadingOrder.Add(html); } } return(result); }
private static EpubResources LoadResources(ZipArchive epubArchive, EpubBook book) { var resources = new EpubResources(); foreach (var item in book.Format.Opf.Manifest.Items) { var path = PathExt.Combine(Path.GetDirectoryName(book.Format.Ocf.RootFilePath), item.Href); var entry = epubArchive.GetEntryImproved(path); if (entry == null) { throw new EpubParseException($"file {path} not found in archive."); } if (entry.Length > int.MaxValue) { throw new EpubParseException($"file {path} is bigger than 2 Gb."); } var fileName = item.Href; var mimeType = item.MediaType; EpubContentType contentType; contentType = ContentType.MimeTypeToContentType.TryGetValue(mimeType, out contentType) ? contentType : EpubContentType.Other; switch (contentType) { case EpubContentType.Xhtml11: case EpubContentType.Css: case EpubContentType.Oeb1Document: case EpubContentType.Oeb1Css: case EpubContentType.Xml: case EpubContentType.Dtbook: case EpubContentType.DtbookNcx: { var file = new EpubTextFile { FileName = fileName, MimeType = mimeType, ContentType = contentType }; using (var stream = entry.Open()) { file.Content = stream.ReadToEnd(); } switch (contentType) { case EpubContentType.Xhtml11: resources.Html.Add(file); break; case EpubContentType.Css: resources.Css.Add(file); break; default: resources.Other.Add(file); break; } break; } default: { var file = new EpubByteFile { FileName = fileName, MimeType = mimeType, ContentType = contentType }; using (var stream = entry.Open()) { if (stream == null) { throw new EpubException($"Incorrect EPUB file: content file \"{fileName}\" specified in manifest is not found"); } using (var memoryStream = new MemoryStream((int)entry.Length)) { stream.CopyTo(memoryStream); file.Content = memoryStream.ToArray(); } } switch (contentType) { case EpubContentType.ImageGif: case EpubContentType.ImageJpeg: case EpubContentType.ImagePng: case EpubContentType.ImageSvg: resources.Images.Add(file); break; case EpubContentType.FontTruetype: case EpubContentType.FontOpentype: resources.Fonts.Add(file); break; default: resources.Other.Add(file); break; } break; } } } return(resources); }
public static EpubBook Read(Stream stream, string password) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } using (var archive = ZipFile.Read(stream)) { // OCF var entryOCF = archive.Entries.SingleOrDefault(entry => entry.FileName.Equals(Constants.OcfPath)); if (entryOCF == null) { throw new EpubParseException("Epub OCF doesn't specify a root file."); } var textOCF = GetText(entryOCF, password); var format = new EpubFormat { Ocf = OcfReader.Read(XDocument.Parse(textOCF)) }; var rootFilePath = format.Ocf.RootFilePath; if (rootFilePath == null) { throw new EpubParseException("Epub OCF doesn't specify a root file."); } // OPF var entryOPF = archive.Entries.SingleOrDefault(entry => entry.FileName.Equals(rootFilePath)); if (entryOPF == null) { throw new EpubParseException("Epub OPF doesn't specify a root file."); } var textOPF = GetText(entryOPF, password); format.Opf = OpfReader.Read(XDocument.Parse(textOPF)); // Nav var navPath = format.Opf.FindNavPath(); if (navPath != null) { var absolutePath = PathExt.Combine(PathExt.GetDirectoryPath(rootFilePath), navPath); var entryNav = archive.Entries.SingleOrDefault(entry => entry.FileName.Equals(absolutePath)); if (entryNav != null) { var textNav = GetText(entryNav, password); format.Nav = NavReader.Read(XDocument.Parse(textNav)); } } // Ncx var ncxPath = format.Opf.FindNcxPath(); if (ncxPath != null) { var absolutePath = PathExt.Combine(PathExt.GetDirectoryPath(rootFilePath), ncxPath); var entryNcx = archive.Entries.SingleOrDefault(entry => entry.FileName.Equals(absolutePath)); if (entryNcx != null) { var textNcx = GetText(entryNcx, password); format.Ncx = NcxReader.Read(XDocument.Parse(textNcx)); } } var book = new EpubBook { Format = format }; book.Resources = LoadResources(archive, book, password); book.SpecialResources = LoadSpecialResources(archive, book, password); book.CoverImage = LoadCoverImage(book); book.TableOfContents = new TableOfContents { EpubChapters = LoadChapters(book) }; return(book); } }