public static EpubBookExt TextToEpub(string fileString) { fileString = HtmlEncoder.Default.Encode(fileString); fileString = @"<html>\n<head>\n</head>\n<body>\n<pre id='START_OF_FILE'>\n" + fileString + "</pre></body></html>"; var epubBook = new EpubBookExt(null) { Resources = new EpubResources() }; var file = new EpubTextFile() { TextContent = fileString, //FileName = "Contents.html", AbsolutePath = "./Contents.html", Href = "Contents.html", // why not? ContentType = EpubSharp.Format.EpubContentType.Xml, MimeType = "text/html", Content = System.Text.Encoding.UTF8.GetBytes(fileString), }; epubBook.Resources.Html.Add(file); var bookChapter = new EpubChapter() { Title = "Entire Contents", //FileName = "Contents.html", AbsolutePath = "./Contents.html", HashLocation = "START_OF_FILE", //Anchor = "START_OF_FILE" }; epubBook.TableOfContents.Add(bookChapter); epubBook.FixupHtmlOrdered(); return epubBook; }
/// <summary> /// 获取指定章节内指定ID的内容,通常是注释(EPUB) /// </summary> /// <param name="fileName">文件名(为空时指当前章节)</param> /// <param name="id">ID值</param> public Tip GetSpecificIdContent(string id, string fileName = "") { if (ReaderType != Enums.ReaderType.Epub) { throw new NotSupportedException("This method can only use in epub view"); } else if (_epubView == null) { throw new InvalidOperationException("Epub view not loaded"); } else if (_epubContent == null) { throw new InvalidOperationException("Epub content not loaded"); } else if (string.IsNullOrEmpty(id)) { throw new ArgumentException("Invalid Id"); } else if (!string.IsNullOrEmpty(fileName) && !fileName.Contains(".html", StringComparison.OrdinalIgnoreCase)) { throw new ArgumentException("Invalid File name"); } EpubTextFile info = null; var orders = _epubContent.SpecialResources.HtmlInReadingOrder; if (!string.IsNullOrEmpty(fileName)) { info = orders.Where(p => p.AbsolutePath.Contains(fileName, StringComparison.OrdinalIgnoreCase)).FirstOrDefault(); } else { info = orders[_tempEpubChapterIndex]; } if (info == null) { throw new FileNotFoundException("File not found"); } var doc = new HtmlDocument(); doc.LoadHtml(info.TextContent); var node = doc.GetElementbyId(id); var tip = new Tip(); tip.Id = id; if (node != null) { string title = node.InnerText; string desc = node.ParentNode.InnerText; if (!string.IsNullOrEmpty(title)) { desc.Replace(title, ""); } tip.Title = title.Trim(); tip.Description = desc.Trim(); } return(tip); }
public static string ToPlainText(this EpubTextFile epubTextFile) { var builder = new StringBuilder(); builder.Append(Html.ConvertHtml(epubTextFile.TextContent)); return(builder.ToString().Trim()); }
public static ICollection <string> ToParagraphs(this EpubTextFile epubTextFile) { var reg = "\r\n|\n\r|\r|\n"; var text = epubTextFile.ToPlainText(); var paragraphs = Regex.Split(text, reg).ToList(); paragraphs.RemoveAll(t => string.IsNullOrWhiteSpace(t)); return(paragraphs); }
public static string FileName(this EpubTextFile epub) { var str = epub.AbsolutePath; var lastIndex = str.LastIndexOf('/'); if (lastIndex < 0) { return(str); } var retval = str.Substring(lastIndex + 1); return(retval); }
private static bool FindHtmlContainingIdHelper(EpubTextFile html, string str, string id) { if (html.FileName() == id) { // some books just work like this. The ID is the name of the html. return(true); } if (id.EndsWith(".html")) { ; // don't allow these as real ids at all. } else if (HtmlStringIdIndexOf(str, id) >= 0) { return(true); } else if (str.Contains(id)) { ; // can't find it as an id, but is part of a string? } return(false); }
private Chapter GetLastEpubChapter(EpubTextFile chapter) { var orders = _epubContent.SpecialResources.HtmlInReadingOrder; Chapter lastChapter = null; foreach (var header in Chapters) { var corr = orders.Where(p => p.AbsolutePath == header.Link).FirstOrDefault(); if (corr != null) { int index = orders.IndexOf(corr); int currentIndex = orders.IndexOf(chapter); if (currentIndex >= index) { lastChapter = header; } else { break; } } } return(lastChapter); }
/// <summary> /// 获取指定文件名的HTML内容 /// </summary> /// <param name="fileName">文件名</param> /// <returns></returns> public HtmlDocument GetSpecificFileDocument(string fileName = "") { if (ReaderType != ReaderType.Epub) { throw new NotSupportedException("This method can only use in epub view"); } else if (_readerView == null) { throw new InvalidOperationException("Epub view not loaded"); } else if (_epubContent == null) { throw new InvalidOperationException("Epub content not loaded"); } EpubTextFile info = null; var orders = _epubContent.SpecialResources.HtmlInReadingOrder; if (!string.IsNullOrEmpty(fileName)) { info = orders.Where(p => p.AbsolutePath.Contains(fileName, StringComparison.OrdinalIgnoreCase)).FirstOrDefault(); } else { info = orders[_tempEpubChapterIndex]; } if (info == null) { throw new FileNotFoundException("File not found"); } var doc = new HtmlDocument(); doc.LoadHtml(info.TextContent); return(doc); }
private EpubResources LoadResources(EpubFormat format) { var resources = new EpubResources(); foreach (var item in format.Opf.Manifest.Items) { var path = item.Href.ToAbsolutePath(format.Paths.OpfAbsolutePath); FileInfo entry = _bookFolder.GetFile("." + path); if (entry == null) { throw new EpubParseException($"file {path} not found in archive."); } if (entry.Length > int.MaxValue) { throw new EpubParseException($"file {path} is bigger than 2 Gb."); } var href = item.Href; var mimeType = item.MediaType; EpubContentType contentType; contentType = ContentType.MimeTypeToContentType.TryGetValue(mimeType, out contentType) ? contentType : EpubContentType.Other; switch (contentType) { case EpubContentType.Xhtml11: case EpubContentType.Css: case EpubContentType.Oeb1Document: case EpubContentType.Oeb1Css: case EpubContentType.Xml: case EpubContentType.Dtbook: case EpubContentType.DtbookNcx: { var file = new EpubTextFile { AbsolutePath = path, Href = href, MimeType = mimeType, ContentType = contentType }; resources.All.Add(file); using (var stream = entry.OpenRead()) { file.Content = stream.ReadToEnd(); } switch (contentType) { case EpubContentType.Xhtml11: resources.Html.Add(file); break; case EpubContentType.Css: resources.Css.Add(file); break; default: resources.Other.Add(file); break; } break; } default: { var file = new EpubByteFile { AbsolutePath = path, Href = href, MimeType = mimeType, ContentType = contentType }; resources.All.Add(file); using (var stream = entry.OpenRead()) { if (stream == null) { throw new EpubException( $"Incorrect EPUB file: content file \"{href}\" specified in manifest is not found"); } using (var memoryStream = new MemoryStream((int)entry.Length)) { stream.CopyTo(memoryStream); file.Content = memoryStream.ToArray(); } } switch (contentType) { case EpubContentType.ImageGif: case EpubContentType.ImageJpeg: case EpubContentType.ImagePng: case EpubContentType.ImageSvg: resources.Images.Add(file); break; case EpubContentType.FontTruetype: case EpubContentType.FontOpentype: resources.Fonts.Add(file); break; default: resources.Other.Add(file); break; } break; } } } return(resources); }