public static EpubBookExt TextToEpub(string fileString) { fileString = HtmlEncoder.Default.Encode(fileString); fileString = @"<html>\n<head>\n</head>\n<body>\n<pre id='START_OF_FILE'>\n" + fileString + "</pre></body></html>"; var epubBook = new EpubBookExt(null) { Resources = new EpubResources() }; var file = new EpubTextFile() { TextContent = fileString, //FileName = "Contents.html", AbsolutePath = "./Contents.html", Href = "Contents.html", // why not? ContentType = EpubSharp.Format.EpubContentType.Xml, MimeType = "text/html", Content = System.Text.Encoding.UTF8.GetBytes(fileString), }; epubBook.Resources.Html.Add(file); var bookChapter = new EpubChapter() { Title = "Entire Contents", //FileName = "Contents.html", AbsolutePath = "./Contents.html", HashLocation = "START_OF_FILE", //Anchor = "START_OF_FILE" }; epubBook.TableOfContents.Add(bookChapter); epubBook.FixupHtmlOrdered(); return epubBook; }
public static EpubFile GetInternalFileByName(EpubBookExt epubBook, string requestFileName) { var requestWithSlash = "/" + requestFileName; foreach (var imageFile in epubBook.Resources.Images) { var fname = imageFile.FileName(); // FAIL: BAEN 2013 short stories. The cover is requested as "cover.jpeg" from a top-level file. It's listed // in the Images as "/cover.jpeg" which then doesn't match anything. if (fname == requestFileName || fname == requestWithSlash || imageFile.Href == requestFileName) { return(imageFile); } } foreach (var otherFile in epubBook.Resources.Css) { var fname = otherFile.FileName(); if (fname == requestFileName || fname == requestWithSlash || otherFile.Href == requestFileName) { return(otherFile);; } } // FAIL: e.g. UN epub from https://www.unescap.org/publications/accessibility-all-good-practices-accessibility-asia-and-pacific-promote-disability // the epub looks for ../Text/FrontCover.html but the index include Text/FrontCover.html if (requestFileName.StartsWith("../")) { return(GetInternalFileByName(epubBook, requestFileName.Substring("../".Length))); } App.Error($"MainEpubReader: GetInternalFileByName({requestFileName}) can't find requested file"); return(null); }
/// <summary> /// Returns a count of the number of Html sections in the ebook. /// </summary> /// <returns></returns> public static int NHtmlIndex(EpubBookExt epubBook) { var index = 0; foreach (var html in epubBook.ResourcesHtmlOrdered) { index++; } return(index); }
public static string FindHtmlByIndex(EpubBookExt epubBook, int searchIndex) { var index = 0; byte[] lastcontent = null; foreach (var html in epubBook.ResourcesHtmlOrdered) { lastcontent = html.Content; if (index == searchIndex) { var str = System.Text.UTF8Encoding.UTF8.GetString(html.Content); return(str); } index++; } App.Error($"MainEpubReader: FindHtmlByIndex: can't find html index {searchIndex}"); if (lastcontent != null) { return(System.Text.UTF8Encoding.UTF8.GetString(lastcontent)); } return(null); }
private async Task <OpenResult> OpenFile(string fullFilePath, BookLocation location) { OpenResult retval; // default is = OpenResult.OtherError; try { SetScreenToLoading(); Logger.Log($"MainEpubReader: about to load book {fullFilePath}"); var fileContents = await FileMethods.ReadBytesAsync(fullFilePath); bool isZip = fullFilePath.ToUpperInvariant().EndsWith(".ZIP"); if (fileContents == null) { // Failure of some sort, but just kind of punt it. retval = OpenResult.RedownloadableError; EpubBook = null; App.Error($"ERROR: book: unable to load file {fullFilePath}"); SetScreenToLoading(LoadFailureScreen); var md = new MessageDialog($"Error: book file is missing: {BookData.Title} ") { Title = "Atttempting to re-download book" }; await md.ShowAsync(); return(retval); } Logger.Log($"MainEpubReader: read raw array {fileContents.Length}"); // There's a chance that the file is really a text file, not an epub. // Make sure it's at least pre bool isEpub = false; Exception epubException = null; try { // All epub files start with PK\3\4 (because they are zip files). // If it's not that, then is must be a text or html file. if (EpubWizard.IsEpub(fileContents) && !isZip) { var inner = EpubReader.Read(fileContents); EpubBook = new EpubBookExt(inner); isEpub = inner != null; } } catch (Exception ex) { isEpub = false; epubException = ex; } if (!isEpub) { if (isZip) { throw epubException; } try { var fileString = System.Text.Encoding.UTF8.GetString(fileContents); if (!fileString.ToLower().Contains("<html")) { retval = await OpenFileAsText(fileString, location); } else { // We only understand text file and epub, nothing else. throw epubException; } } catch (Exception) { throw; // Meh } } Logger.Log($"MainEpubReader: read book length {fileContents.Length}"); SetChapters?.SetChapters(EpubBook, EpubBook.TableOfContents); if (SetChapters == null) { App.Error($"ISSUE: got new book but SetChapters is null for {fullFilePath}"); } await SetImages?.SetImagesAsync(EpubBook.Resources.Images); await SetImages2?.SetImagesAsync(EpubBook.Resources.Images); await SetImages3?.SetImagesAsync(EpubBook.Resources.Images); if (SetImages == null) { App.Error($"ISSUE: got new book but SetImages is null for {fullFilePath}"); } Logger.Log($"MainEpubReader: about to navigate"); if (location == null) { // Old way: go to the first item in table of contents. New way is to go to file=0 percent=0 // but only if there's any actual files //var chapter = EpubWizard.GetFirstChapter(EpubBook.TableOfContents); //if (chapter != null) if (EpubBook.ResourcesHtmlOrdered.Count > 0) { location = new BookLocation(0, 0); // often the first item is the cover page which isn't in the table of contents. //location = EpubChapterData.FromChapter(chapter); // // // location = new BookLocation(chapter.Anchor ?? chapter.FileName); // FAIL: BAEN likes to have file-per-chapter } } if (location != null) { if (Logger.LogExtraTiming) { Logger.Log($"MainEpubReader: OpenFile: About to move to location as needed. {location}"); } UserNavigatedToArgument = location; NavigateTo(ControlId, location); // We won't get a hairpin navigation callback } retval = OpenResult.OK; } catch (Exception ex) { // Simple error recovery: keep the downloaded data, but report it as // no actually downloaded. retval = OpenResult.OtherError; EpubBook = null; App.Error($"ERROR: book: exception {ex.Message} unable to load file {fullFilePath}"); SetScreenToLoading(LoadFailureScreen); var md = new MessageDialog($"Error: unable to open that book. Internal error {ex.Message}") { Title = "Unable to open book" }; await md.ShowAsync(); } return(retval); }
public void SetChapters(EpubBookExt book, IList <EpubChapter> chapters) { Book = book; Chapters.Clear(); SetChaptersHelper(chapters, 1); }
public static (string value, int index, string filename, string foundId) FindHtmlContainingId(EpubBookExt EpubBook, List <string> idList, int preferredHtmlIndex) { foreach (var id in idList) { if (preferredHtmlIndex >= 0) { var html = EpubBook.ResourcesHtmlOrdered[preferredHtmlIndex]; var str = System.Text.UTF8Encoding.UTF8.GetString(html.Content); var found = FindHtmlContainingIdHelper(html, str, id); if (found) { return(str, preferredHtmlIndex, html.FileName(), id); } } var index = 0; foreach (var html in EpubBook.ResourcesHtmlOrdered) { var str = System.Text.UTF8Encoding.UTF8.GetString(html.Content); var found = FindHtmlContainingIdHelper(html, str, id); if (found) { return(str, index, html.FileName(), id); } index++; } } if (idList[0] != "uiLog") { App.Error($"ERROR: unable to find html containing id={idList[0]} in the ebook"); } return(null, -1, null, null); }
public static (string value, int index, string filename) FindHtmlContainingHtmlFileName(EpubBookExt epubBook, string htmlFileName) { var htmlFileNameVariants = MakeHtmlFileNameVariants(htmlFileName); var index = 0; foreach (var html in epubBook.ResourcesHtmlOrdered) { // FAIL: Might be encoded: we get file%20space.xhml but need to find file<sp>space.xhml // FAIL: e.g. UN epub from https://www.unescap.org/publications/accessibility-all-good-practices-accessibility-asia-and-pacific-promote-disability // the epub looks for ../Text/FrontCover.html but the index include Text/FrontCover.html if (FileNameMatches(html, htmlFileNameVariants)) { var str = System.Text.UTF8Encoding.UTF8.GetString(html.Content); return(str, index, html.FileName()); } index++; } App.Error($"MainEpubReader: FindHtmlContainingHtmlFileName: can't find {htmlFileName}"); return(null, -1, null); }
/// <summary> /// Returns the id of the chapter that contains an anchor. This is used, for example, when /// selecting an image and wanting to shift the chapter display. /// </summary> /// <param name="id"></param> /// <returns></returns> public static string GetChapterContainingId(EpubBookExt epubBook, string id, int preferredHtmlIndex) { if (string.IsNullOrEmpty(id)) { // just return the first chapter return(GetFirstChapter(epubBook.TableOfContents).HashLocation ?? ""); // ?? ""; } // Step one: find the html with the id var idList = EpubWizard.GetIdVariants(id); var(foundHtml, foundIndex, foundHtmlName, foundId) = EpubWizard.FindHtmlContainingId(epubBook, idList, preferredHtmlIndex); if (foundHtml == null) { if (id != "uiLog") { // uiLog isn't always findable for ... reasons App.Error($"IMPOSSIBLE ERROR: completely unable to find id {id} "); } return(null); } var pos = EpubWizard.HtmlStringIdIndexOf(foundHtml, id); string closest = null; FindClosestAnchorHelper(foundHtml, pos, epubBook.TableOfContents, 0, 3, ref closest); // Fixup #1: try the TOC directly // FAIL: the order of the fixups is really important. BAEN 2013 short stories doesn't include chapter id values // and they have one story with nested sections AND they have duplicate ID values (calibre_pb_1 etc.) AND each story is // in its own HTML page. If you select the first story after the story with sub-stories, then we really want to find // the story by chapter and don't want the previous story. if (closest == null) { // FAIL: All of me a small town romance: the chapters don't have any anchors at all. // Instead of looking for the chapter by id, look for it based on a matching // filename. If it matches, return the Filename as the id. // No, it's not quite an id, but it is close enough to work :-) foreach (var chapter in epubBook.TableOfContents) { // The chapters here might have names like ../TextFiles/chapter.xhml // while we're looking for plain TextFiles/chapter.xhml // We have to return the raw chapter name because we'll use it later on. // // // BUG: using the wrong name!!! var htmlFileNameVariants = MakeHtmlFileNameVariants(foundHtmlName); var htmlFileNameVariants = MakeHtmlFileNameVariants(chapter.FileName()); foreach (var fname in htmlFileNameVariants) { if (fname == foundHtmlName) { closest = chapter.FileName(); } } } } // Fixup #2: maybe try the previous HTML if (closest == null) { // Didn't find one; that's probably because we're in a gap. We need to find the same thing for the // previous chapter, but with closest set to the end of the html. if (foundIndex > 0) { foundHtml = FindHtmlByIndex(epubBook, foundIndex - 1); FindClosestAnchorHelper(foundHtml, int.MaxValue, epubBook.TableOfContents, 0, 3, ref closest); } // First html, and still can't find anything? Give up, we're not going to find anything. } // All the fixups failed if (closest == null) { App.Error($"ERROR: when asked for matching chapter, can't find it for {id}. Possibly the chapters have no anchors."); } return(closest); }