public static EpubBookExt TextToEpub(string fileString)
        {
            fileString = HtmlEncoder.Default.Encode(fileString);
            fileString = @"<html>\n<head>\n</head>\n<body>\n<pre id='START_OF_FILE'>\n" + fileString + "</pre></body></html>";

            var epubBook = new EpubBookExt(null)
            {
                Resources = new EpubResources()
            };
            var file = new EpubTextFile()
            {
                TextContent = fileString,
                //FileName = "Contents.html",
                AbsolutePath = "./Contents.html",
                Href = "Contents.html", // why not?
                ContentType = EpubSharp.Format.EpubContentType.Xml,
                MimeType = "text/html",
                Content = System.Text.Encoding.UTF8.GetBytes(fileString),
            };

            epubBook.Resources.Html.Add(file);
            var bookChapter = new EpubChapter()
            {
                Title = "Entire Contents",
                //FileName = "Contents.html",
                AbsolutePath = "./Contents.html",
                HashLocation = "START_OF_FILE",
                //Anchor = "START_OF_FILE"
            };
            epubBook.TableOfContents.Add(bookChapter);
            epubBook.FixupHtmlOrdered();
            return epubBook;
        }
Beispiel #2
0
        public static EpubFile GetInternalFileByName(EpubBookExt epubBook, string requestFileName)
        {
            var requestWithSlash = "/" + requestFileName;

            foreach (var imageFile in epubBook.Resources.Images)
            {
                var fname = imageFile.FileName();
                // FAIL: BAEN 2013 short stories. The cover is requested as "cover.jpeg" from a top-level file. It's listed
                // in the Images as "/cover.jpeg" which then doesn't match anything.
                if (fname == requestFileName || fname == requestWithSlash || imageFile.Href == requestFileName)
                {
                    return(imageFile);
                }
            }
            foreach (var otherFile in epubBook.Resources.Css)
            {
                var fname = otherFile.FileName();
                if (fname == requestFileName || fname == requestWithSlash || otherFile.Href == requestFileName)
                {
                    return(otherFile);;
                }
            }
            // FAIL: e.g. UN epub from https://www.unescap.org/publications/accessibility-all-good-practices-accessibility-asia-and-pacific-promote-disability
            // the epub looks for ../Text/FrontCover.html but the index include Text/FrontCover.html
            if (requestFileName.StartsWith("../"))
            {
                return(GetInternalFileByName(epubBook, requestFileName.Substring("../".Length)));
            }

            App.Error($"MainEpubReader: GetInternalFileByName({requestFileName}) can't find requested file");
            return(null);
        }
Beispiel #3
0
        /// <summary>
        /// Returns a count of the number of Html sections in the ebook.
        /// </summary>
        /// <returns></returns>
        public static int NHtmlIndex(EpubBookExt epubBook)
        {
            var index = 0;

            foreach (var html in epubBook.ResourcesHtmlOrdered)
            {
                index++;
            }
            return(index);
        }
Beispiel #4
0
        public static string FindHtmlByIndex(EpubBookExt epubBook, int searchIndex)
        {
            var index = 0;

            byte[] lastcontent = null;
            foreach (var html in epubBook.ResourcesHtmlOrdered)
            {
                lastcontent = html.Content;
                if (index == searchIndex)
                {
                    var str = System.Text.UTF8Encoding.UTF8.GetString(html.Content);
                    return(str);
                }
                index++;
            }
            App.Error($"MainEpubReader: FindHtmlByIndex: can't find html index {searchIndex}");
            if (lastcontent != null)
            {
                return(System.Text.UTF8Encoding.UTF8.GetString(lastcontent));
            }
            return(null);
        }
Beispiel #5
0
        private async Task <OpenResult> OpenFile(string fullFilePath, BookLocation location)
        {
            OpenResult retval; // default is = OpenResult.OtherError;

            try
            {
                SetScreenToLoading();
                Logger.Log($"MainEpubReader: about to load book {fullFilePath}");
                var fileContents = await FileMethods.ReadBytesAsync(fullFilePath);

                bool isZip = fullFilePath.ToUpperInvariant().EndsWith(".ZIP");
                if (fileContents == null)
                {
                    // Failure of some sort, but just kind of punt it.
                    retval   = OpenResult.RedownloadableError;
                    EpubBook = null;
                    App.Error($"ERROR: book: unable to load file {fullFilePath}");

                    SetScreenToLoading(LoadFailureScreen);
                    var md = new MessageDialog($"Error: book file is missing: {BookData.Title} ")
                    {
                        Title = "Atttempting to re-download book"
                    };
                    await md.ShowAsync();

                    return(retval);
                }

                Logger.Log($"MainEpubReader: read raw array {fileContents.Length}");

                // There's a chance that the file is really a text file, not an epub.
                // Make sure it's at least pre
                bool      isEpub        = false;
                Exception epubException = null;
                try
                {
                    // All epub files start with PK\3\4 (because they are zip files).
                    // If it's not that, then is must be a text or html file.
                    if (EpubWizard.IsEpub(fileContents) && !isZip)
                    {
                        var inner = EpubReader.Read(fileContents);
                        EpubBook = new EpubBookExt(inner);
                        isEpub   = inner != null;
                    }
                }
                catch (Exception ex)
                {
                    isEpub        = false;
                    epubException = ex;
                }

                if (!isEpub)
                {
                    if (isZip)
                    {
                        throw epubException;
                    }

                    try
                    {
                        var fileString = System.Text.Encoding.UTF8.GetString(fileContents);
                        if (!fileString.ToLower().Contains("<html"))
                        {
                            retval = await OpenFileAsText(fileString, location);
                        }
                        else
                        {
                            // We only understand text file and epub, nothing else.
                            throw epubException;
                        }
                    }
                    catch (Exception)
                    {
                        throw; // Meh
                    }
                }
                Logger.Log($"MainEpubReader: read book length {fileContents.Length}");

                SetChapters?.SetChapters(EpubBook, EpubBook.TableOfContents);
                if (SetChapters == null)
                {
                    App.Error($"ISSUE: got new book but SetChapters is null for {fullFilePath}");
                }
                await SetImages?.SetImagesAsync(EpubBook.Resources.Images);

                await SetImages2?.SetImagesAsync(EpubBook.Resources.Images);

                await SetImages3?.SetImagesAsync(EpubBook.Resources.Images);

                if (SetImages == null)
                {
                    App.Error($"ISSUE: got new book but SetImages is null for {fullFilePath}");
                }

                Logger.Log($"MainEpubReader: about to navigate");

                if (location == null)
                {
                    // Old way: go to the first item in table of contents. New way is to go to file=0 percent=0
                    // but only if there's any actual files
                    //var chapter = EpubWizard.GetFirstChapter(EpubBook.TableOfContents);
                    //if (chapter != null)
                    if (EpubBook.ResourcesHtmlOrdered.Count > 0)
                    {
                        location = new BookLocation(0, 0); // often the first item is the cover page which isn't in the table of contents.
                        //location = EpubChapterData.FromChapter(chapter);
                        // // // location = new BookLocation(chapter.Anchor ?? chapter.FileName); // FAIL: BAEN likes to have file-per-chapter
                    }
                }

                if (location != null)
                {
                    if (Logger.LogExtraTiming)
                    {
                        Logger.Log($"MainEpubReader: OpenFile: About to move to location as needed. {location}");
                    }
                    UserNavigatedToArgument = location;
                    NavigateTo(ControlId, location); // We won't get a hairpin navigation callback
                }
                retval = OpenResult.OK;
            }
            catch (Exception ex)
            {
                // Simple error recovery: keep the downloaded data, but report it as
                // no actually downloaded.
                retval   = OpenResult.OtherError;
                EpubBook = null;
                App.Error($"ERROR: book: exception {ex.Message} unable to load file {fullFilePath}");

                SetScreenToLoading(LoadFailureScreen);
                var md = new MessageDialog($"Error: unable to open that book. Internal error {ex.Message}")
                {
                    Title = "Unable to open book"
                };
                await md.ShowAsync();
            }
            return(retval);
        }
 public void SetChapters(EpubBookExt book, IList <EpubChapter> chapters)
 {
     Book = book;
     Chapters.Clear();
     SetChaptersHelper(chapters, 1);
 }
Beispiel #7
0
        public static (string value, int index, string filename, string foundId) FindHtmlContainingId(EpubBookExt EpubBook, List <string> idList, int preferredHtmlIndex)
        {
            foreach (var id in idList)
            {
                if (preferredHtmlIndex >= 0)
                {
                    var html  = EpubBook.ResourcesHtmlOrdered[preferredHtmlIndex];
                    var str   = System.Text.UTF8Encoding.UTF8.GetString(html.Content);
                    var found = FindHtmlContainingIdHelper(html, str, id);
                    if (found)
                    {
                        return(str, preferredHtmlIndex, html.FileName(), id);
                    }
                }

                var index = 0;
                foreach (var html in EpubBook.ResourcesHtmlOrdered)
                {
                    var str   = System.Text.UTF8Encoding.UTF8.GetString(html.Content);
                    var found = FindHtmlContainingIdHelper(html, str, id);
                    if (found)
                    {
                        return(str, index, html.FileName(), id);
                    }
                    index++;
                }
            }
            if (idList[0] != "uiLog")
            {
                App.Error($"ERROR: unable to find html containing id={idList[0]} in the ebook");
            }
            return(null, -1, null, null);
        }
Beispiel #8
0
        public static (string value, int index, string filename) FindHtmlContainingHtmlFileName(EpubBookExt epubBook, string htmlFileName)
        {
            var htmlFileNameVariants = MakeHtmlFileNameVariants(htmlFileName);
            var index = 0;

            foreach (var html in epubBook.ResourcesHtmlOrdered)
            {
                // FAIL: Might be encoded: we get file%20space.xhml but need to find file<sp>space.xhml
                // FAIL: e.g. UN epub from https://www.unescap.org/publications/accessibility-all-good-practices-accessibility-asia-and-pacific-promote-disability
                // the epub looks for ../Text/FrontCover.html but the index include Text/FrontCover.html
                if (FileNameMatches(html, htmlFileNameVariants))
                {
                    var str = System.Text.UTF8Encoding.UTF8.GetString(html.Content);
                    return(str, index, html.FileName());
                }
                index++;
            }

            App.Error($"MainEpubReader: FindHtmlContainingHtmlFileName: can't find {htmlFileName}");
            return(null, -1, null);
        }
Beispiel #9
0
        /// <summary>
        /// Returns the id of the chapter that contains an anchor. This is used, for example, when
        /// selecting an image and wanting to shift the chapter display.
        /// </summary>
        /// <param name="id"></param>
        /// <returns></returns>
        public static string GetChapterContainingId(EpubBookExt epubBook, string id, int preferredHtmlIndex)
        {
            if (string.IsNullOrEmpty(id))
            {
                // just return the first chapter
                return(GetFirstChapter(epubBook.TableOfContents).HashLocation ?? ""); // ?? "";
            }

            // Step one: find the html with the id
            var idList = EpubWizard.GetIdVariants(id);

            var(foundHtml, foundIndex, foundHtmlName, foundId) = EpubWizard.FindHtmlContainingId(epubBook, idList, preferredHtmlIndex);
            if (foundHtml == null)
            {
                if (id != "uiLog")
                {
                    // uiLog isn't always findable for ... reasons
                    App.Error($"IMPOSSIBLE ERROR: completely unable to find id {id} ");
                }
                return(null);
            }

            var    pos     = EpubWizard.HtmlStringIdIndexOf(foundHtml, id);
            string closest = null;

            FindClosestAnchorHelper(foundHtml, pos, epubBook.TableOfContents, 0, 3, ref closest);

            // Fixup #1: try the TOC directly
            // FAIL: the order of the fixups is really important. BAEN 2013 short stories doesn't include chapter id values
            // and they have one story with nested sections AND they have duplicate ID values (calibre_pb_1 etc.) AND each story is
            // in its own HTML page. If you select the first story after the story with sub-stories, then we really want to find
            // the story by chapter and don't want the previous story.
            if (closest == null)
            {
                // FAIL: All of me a small town romance: the chapters don't have any anchors at all.
                // Instead of looking for the chapter by id, look for it based on a matching
                // filename. If it matches, return the Filename as the id.
                // No, it's not quite an id, but it is close enough to work :-)

                foreach (var chapter in epubBook.TableOfContents)
                {
                    // The chapters here might have names like ../TextFiles/chapter.xhml
                    // while we're looking for plain TextFiles/chapter.xhml
                    // We have to return the raw chapter name because we'll use it later on.
                    // // // BUG: using the wrong name!!!  var htmlFileNameVariants = MakeHtmlFileNameVariants(foundHtmlName);
                    var htmlFileNameVariants = MakeHtmlFileNameVariants(chapter.FileName());
                    foreach (var fname in htmlFileNameVariants)
                    {
                        if (fname == foundHtmlName)
                        {
                            closest = chapter.FileName();
                        }
                    }
                }
            }

            // Fixup #2: maybe try the previous HTML
            if (closest == null)
            {
                // Didn't find one; that's probably because we're in a gap. We need to find the same thing for the
                // previous chapter, but with closest set to the end of the html.
                if (foundIndex > 0)
                {
                    foundHtml = FindHtmlByIndex(epubBook, foundIndex - 1);
                    FindClosestAnchorHelper(foundHtml, int.MaxValue, epubBook.TableOfContents, 0, 3, ref closest);
                }
                // First html, and still can't find anything? Give up, we're not going to find anything.
            }

            // All the fixups failed
            if (closest == null)
            {
                App.Error($"ERROR: when asked for matching chapter, can't find it for {id}. Possibly the chapters have no anchors.");
            }

            return(closest);
        }