private static int Test_HtmlStringIdIndexOf_One(string html, string id, bool expected) { int nerror = 0; var actual = EpubWizard.HtmlStringIdIndexOf(html, id) >= 0; if (actual != expected) { nerror++; App.Error($"ERROR: HtmlStringContainsId({html}, {id}) expected {expected} but got {actual}"); } return(nerror); }
public static string FindClosestAnchorHelper(string foundHtml, int maxPosition, IList <EpubChapter> chapterList, int currDepth, int maxDepth, ref string closest) { foreach (var chapter in chapterList) { // FAIL: for many ebooks, the anchors are complex values that unlikely to be found in // any book. But for Fire at Red Lake, there's a set of transcriber's notes in their own // chapter. The chapter anchor for these is "tn". No surprise, there's plenty of words // that includes a 'tn' and therefore the transcribers notes are preferentially found. var chpos = EpubWizard.HtmlStringIdIndexOf(foundHtml, chapter.HashLocation ?? "***NO ANCHOR***"); //was .Anchor if (chpos >= 0 && chpos <= maxPosition) { // Found a possible match. The same html might be referred to by multiple chapters, // so keep on going; there might be a better match. closest = chapter.HashLocation; // .Anchor; } if ((currDepth + 1 < maxDepth) && (chapter.SubChapters.Count > 0)) { FindClosestAnchorHelper(foundHtml, maxPosition, chapter.SubChapters, currDepth + 1, maxDepth, ref closest); } } return(closest); }
/// <summary> /// Returns the id of the chapter that contains an anchor. This is used, for example, when /// selecting an image and wanting to shift the chapter display. /// </summary> /// <param name="id"></param> /// <returns></returns> public static string GetChapterContainingId(EpubBookExt epubBook, string id, int preferredHtmlIndex) { if (string.IsNullOrEmpty(id)) { // just return the first chapter return(GetFirstChapter(epubBook.TableOfContents).HashLocation ?? ""); // ?? ""; } // Step one: find the html with the id var idList = EpubWizard.GetIdVariants(id); var(foundHtml, foundIndex, foundHtmlName, foundId) = EpubWizard.FindHtmlContainingId(epubBook, idList, preferredHtmlIndex); if (foundHtml == null) { if (id != "uiLog") { // uiLog isn't always findable for ... reasons App.Error($"IMPOSSIBLE ERROR: completely unable to find id {id} "); } return(null); } var pos = EpubWizard.HtmlStringIdIndexOf(foundHtml, id); string closest = null; FindClosestAnchorHelper(foundHtml, pos, epubBook.TableOfContents, 0, 3, ref closest); // Fixup #1: try the TOC directly // FAIL: the order of the fixups is really important. BAEN 2013 short stories doesn't include chapter id values // and they have one story with nested sections AND they have duplicate ID values (calibre_pb_1 etc.) AND each story is // in its own HTML page. If you select the first story after the story with sub-stories, then we really want to find // the story by chapter and don't want the previous story. if (closest == null) { // FAIL: All of me a small town romance: the chapters don't have any anchors at all. // Instead of looking for the chapter by id, look for it based on a matching // filename. If it matches, return the Filename as the id. // No, it's not quite an id, but it is close enough to work :-) foreach (var chapter in epubBook.TableOfContents) { // The chapters here might have names like ../TextFiles/chapter.xhml // while we're looking for plain TextFiles/chapter.xhml // We have to return the raw chapter name because we'll use it later on. // // // BUG: using the wrong name!!! var htmlFileNameVariants = MakeHtmlFileNameVariants(foundHtmlName); var htmlFileNameVariants = MakeHtmlFileNameVariants(chapter.FileName()); foreach (var fname in htmlFileNameVariants) { if (fname == foundHtmlName) { closest = chapter.FileName(); } } } } // Fixup #2: maybe try the previous HTML if (closest == null) { // Didn't find one; that's probably because we're in a gap. We need to find the same thing for the // previous chapter, but with closest set to the end of the html. if (foundIndex > 0) { foundHtml = FindHtmlByIndex(epubBook, foundIndex - 1); FindClosestAnchorHelper(foundHtml, int.MaxValue, epubBook.TableOfContents, 0, 3, ref closest); } // First html, and still can't find anything? Give up, we're not going to find anything. } // All the fixups failed if (closest == null) { App.Error($"ERROR: when asked for matching chapter, can't find it for {id}. Possibly the chapters have no anchors."); } return(closest); }