public void RemoveUnwantedLanguageData_TextPage_RemovesUnwantedDivs() { var html = @"<!DOCTYPE html> <html> <body> <div class='bloom-page numberedPage customPage side-right A4Landscape bloom-monolingual' data-page='' id='ba82b94f-71ec-48f7-a1cc-a68d5765e255' data-page-number='2' testRemoves='false' lang=''> <div class='pageLabel' data-i18n='TemplateBooks.PageLabel.Just Text' testRemoves='false' lang='en'> Just Text </div> <div class='pageDescription' testRemoves='false' lang='en'></div> <div class='marginBox'> <div class='split-pane-component-inner'> <div aria-describedby='qtip-0' data-hasqtip='true' class='bloom-translationGroup bloom-trailingElement' data-default-languages='auto'> <div role='textbox' class='bloom-editable normal-style bloom-content1 bloom-visibility-code-on' contenteditable='true' testRemoves='false' lang='en'> <p>This is Robin.</p> </div> <div role='textbox' class='bloom-editable normal-style' contenteditable='true' testRemoves='true' lang='tl'> <p>Ako si Robin</p> </div> <div role='textbox' class='bloom-editable normal-style' contenteditable='true' testRemoves='true' lang='ceb'></div> <div class='bloom-editable normal-style' contenteditable='true' testRemoves='false' lang='z'></div> </div> </div> </div> </div> </body> </html>"; // Check occurrences in original HTML. var dom = new HtmlDom(html); var assertThatDom = AssertThatXmlIn.Dom(dom.RawDom); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='' and contains(@class, 'bloom-page')]", 1); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageLabel']", 1); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageDescription']", 1); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and contains(@class, 'bloom-editable') and @role='textbox']", 1); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='tl' and contains(@class, 'bloom-editable') and @role='textbox']", 1); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='ceb' and contains(@class, 'bloom-editable') and @role='textbox']", 1); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='z' and contains(@class, 'bloom-editable') and @contenteditable='true']", 1); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang]", 7); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang and @testRemoves='false']", 5); assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang and @testRemoves='true']", 2); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }); // Check occurrences in modified HTML. assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='' and contains(@class, 'bloom-page')]", 1); // unchanged assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageLabel']", 1); // unchanged assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageDescription']", 1); // unchanged assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and contains(@class, 'bloom-editable') and @role='textbox']", 1); // unchanged assertThatDom.HasNoMatchForXpath("//div[@lang='tl' and contains(@class, 'bloom-editable') and @role='textbox']"); // removed editable textbox assertThatDom.HasNoMatchForXpath("//div[@lang='ceb' and contains(@class, 'bloom-editable') and @role='textbox']"); // removed editable textbox assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='z' and contains(@class, 'bloom-editable') and @contenteditable='true']", 1); // unchanged assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang]", 5); // removed 2 editable textboxes assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang and @testRemoves='false']", 5); assertThatDom.HasNoMatchForXpath("//div[@lang and @testRemoves='true']"); }
public void RemoveUnwantedLanguageData(string destDirName, IEnumerable <string> languagesToInclude) { // There should be only one html file with the same name as the directory it's in, but let's // not make any assumptions here. foreach (var filepath in Directory.EnumerateFiles(destDirName, "*.htm")) { var xmlDomFromHtmlFile = XmlHtmlConverter.GetXmlDomFromHtmlFile(filepath, false); var dom = new HtmlDom(xmlDomFromHtmlFile); PublishModel.RemoveUnwantedLanguageData(dom, languagesToInclude); XmlHtmlConverter.SaveDOMAsHtml5(dom.RawDom, filepath); } }
public void RemoveUnwantedLanguageData_BloomDataDiv_RemovesNothingEvenWithN1() { var dom = new HtmlDom(kDataDivHtml); // Check occurrences in original HTML. VerifyDataDivValues(dom); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "en"); // Check occurrences in modified HTML. This should be exactly the same as before. VerifyDataDivValues(dom); }
public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWantedWithXmatterN1() { var dom = new HtmlDom(kEmbeddedLangDivsXMatterHtml); // Check occurrences in original HTML. VerifyOriginalEmbeddedDivsAreAllThere(dom); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "de"); // Check occurrences in modified HTML: German should be preserved, French and Spanish removed. VerifyOnlyUnwantedEmbeddedDivsAreRemoved(dom, true); }
public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWantedWithXmatter() { var dom = new HtmlDom(kEmbeddedLangDivsXMatterHtml); // Check occurrences in original HTML. VerifyOriginalEmbeddedDivsAreAllThere(dom); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }); // Check occurrences in modified HTML: nothing removed from xmatter unless national language is specified. VerifyOriginalEmbeddedDivsAreAllThere(dom); }
public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWantedWithN1() { var dom = new HtmlDom(kEmbeddedLangDivsHtml); // Check occurrences in original HTML. VerifyOriginalEmbeddedDivsAreAllThere(dom); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "de"); // Check occurrences in modified HTML: should be same for content page regardless of specifying national language. VerifyOnlyUnwantedEmbeddedDivsAreRemoved(dom, false); }
public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWanted() { var dom = new HtmlDom(kEmbeddedLangDivsHtml); // Check occurrences in original HTML. VerifyOriginalEmbeddedDivsAreAllThere(dom); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }); // Check occurrences in modified HTML. VerifyOnlyUnwantedEmbeddedDivsAreRemoved(dom, false); }
public void RemoveUnwantedLanguageData_CreditsPage_RemovesRemovesUnwantedButKeepsN1() { var dom = new HtmlDom(kCreditsPageHtml); // Check occurrences in original HTML. VerifyCreditsPageValues(dom, false); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "en"); // Check occurrences in modified HTML. This should NOT be exactly the same as before. VerifyCreditsPageValues(dom, true); }
public void RemoveUnwantedLanguageData_FrontCoverPage_RemovesNothing() { var dom = new HtmlDom(kFrontCoverHtml); // Check occurrences in original HTML. VerifyFrontCoverValues(dom, false); // SUT PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }); // Check occurrences in modified HTML. This should be exactly the same as before. VerifyFrontCoverValues(dom, false); }
public static Book.Book PrepareBookForBloomReader(string bookFolderPath, BookServer bookServer, TemporaryFolder temp, IWebSocketProgress progress, bool isTemplateBook, string creator = kCreatorBloom, AndroidPublishSettings settings = null) { // MakeDeviceXmatterTempBook needs to be able to copy customCollectionStyles.css etc into parent of bookFolderPath // And bloom-player expects folder name to match html file name. var htmPath = BookStorage.FindBookHtmlInFolder(bookFolderPath); var tentativeBookFolderPath = Path.Combine(temp.FolderPath, // Windows directory names cannot have trailing periods, but FileNameWithoutExtension can have these. (BH-6097) BookStorage.SanitizeNameForFileSystem(Path.GetFileNameWithoutExtension(htmPath))); Directory.CreateDirectory(tentativeBookFolderPath); var modifiedBook = PublishHelper.MakeDeviceXmatterTempBook(bookFolderPath, bookServer, tentativeBookFolderPath, isTemplateBook); modifiedBook.SetMotionAttributesOnBody(settings?.Motion ?? false); // Although usually tentativeBookFolderPath and modifiedBook.FolderPath are the same, there are some exceptions // In the process of bringing a book up-to-date (called by MakeDeviceXmatterTempBook), the folder path may change. // For example, it could change if the original folder path contains punctuation marks now deemed dangerous. // The book will be moved to the sanitized version of the file name instead. // It can also happen if we end up picking a different version of the title (i.e. in a different language) // than the one written to the .htm file. string modifiedBookFolderPath = modifiedBook.FolderPath; if (modifiedBook.CollectionSettings.HaveEnterpriseFeatures) { ProcessQuizzes(modifiedBookFolderPath, modifiedBook.RawDom); } // Right here, let's maintain the history of what the BloomdVersion signifies to a reader. // Version 1 (as opposed to no BloomdVersion field): the bookFeatures property may be // used to report features analytics (with earlier bloompub's, the reader must use its own logic) modifiedBook.Storage.BookInfo.MetaData.BloomdVersion = 1; modifiedBook.Storage.BookInfo.UpdateOneSingletonTag("distribution", settings?.DistributionTag); if (!string.IsNullOrEmpty(settings?.BookshelfTag)) { modifiedBook.Storage.BookInfo.UpdateOneSingletonTag("bookshelf", settings.BookshelfTag); } if (settings?.RemoveInteractivePages ?? false) { var activities = modifiedBook.GetPageElements().Cast <XmlNode>() .Where(x => x is XmlElement elt && HtmlDom.IsActivityPage(elt)).ToArray(); foreach (var page in activities) { page.ParentNode.RemoveChild(page); } } if (settings?.LanguagesToInclude != null) { PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, settings.LanguagesToInclude, modifiedBook.BookData.MetadataLanguage1IsoCode); PublishModel.RemoveUnwantedLanguageRulesFromCssFiles(modifiedBook.FolderPath, settings.LanguagesToInclude); } else if (Program.RunningHarvesterMode && modifiedBook.OurHtmlDom.SelectSingleNode(BookStorage.ComicalXpath) != null) { // This indicates that we are harvesting a book with comic speech bubbles or other overlays (Overlay Tool). // For books with overlays, we only publish a single language. It's not currently feasible to // allow the reader to switch language in a book with overlays, because typically that requires // adjusting the positions of the overlays, and we don't yet support having more than one // set of overlay locations in a single book. See BL-7912 for some ideas on how we might // eventually improve this. In the meantime, switching language would have bad effects, // and if you can't switch language, there's no point in the book containing more than one. var languagesToInclude = new string[1] { modifiedBook.BookData.Language1.Iso639Code }; PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, languagesToInclude, modifiedBook.BookData.MetadataLanguage1IsoCode); } // Do this after processing interactive pages, as they can satisfy the criteria for being 'blank' HashSet <string> fontsUsed = null; using (var helper = new PublishHelper()) { helper.ControlForInvoke = ControlForInvoke; ISet <string> warningMessages = new HashSet <string>(); helper.RemoveUnwantedContent(modifiedBook.OurHtmlDom, modifiedBook, false, warningMessages, keepPageLabels: settings?.WantPageLabels ?? false); PublishHelper.SendBatchedWarningMessagesToProgress(warningMessages, progress); fontsUsed = helper.FontsUsed; } if (!modifiedBook.IsTemplateBook) { modifiedBook.RemoveBlankPages(settings?.LanguagesToInclude); } // See https://issues.bloomlibrary.org/youtrack/issue/BL-6835. RemoveInvisibleImageElements(modifiedBook); modifiedBook.Storage.CleanupUnusedSupportFiles(/*isForPublish:*/ true, settings?.AudioLanguagesToExclude); if (!modifiedBook.IsTemplateBook && RobustFile.Exists(Path.Combine(modifiedBookFolderPath, "placeHolder.png"))) { RobustFile.Delete(Path.Combine(modifiedBookFolderPath, "placeHolder.png")); } modifiedBook.RemoveObsoleteAudioMarkup(); // We want these to run after RemoveUnwantedContent() so that the metadata will more accurately reflect // the subset of contents that are included in the .bloompub // Note that we generally want to disable features here, but not enable them, especially while // running harvester! See https://issues.bloomlibrary.org/youtrack/issue/BL-8995. var enableBlind = modifiedBook.BookInfo.MetaData.Feature_Blind || !Program.RunningHarvesterMode; // BloomReader and BloomPlayer are not using the SignLanguage feature, and it's misleading to // assume the existence of videos implies sign language. There is a separate "Video" feature // now that gets set automatically. (Automated setting of the Blind feature is imperfect, but // more meaningful than trying to automate sign language just based on one video existing.) var enableSignLanguage = modifiedBook.BookInfo.MetaData.Feature_SignLanguage; modifiedBook.UpdateMetadataFeatures( isBlindEnabled: enableBlind, isSignLanguageEnabled: enableSignLanguage, isTalkingBookEnabled: true, // talkingBook is only ever set automatically as far as I can tell. allowedLanguages: null // allow all because we've already filtered out the unwanted ones from the dom above. ); modifiedBook.SetAnimationDurationsFromAudioDurations(); modifiedBook.OurHtmlDom.SetMedia("bloomReader"); modifiedBook.OurHtmlDom.AddOrReplaceMetaElement("bloom-digital-creator", creator); EmbedFonts(modifiedBook, progress, fontsUsed, FontFileFinder.GetInstance(Program.RunningUnitTests)); var bookFile = BookStorage.FindBookHtmlInFolder(modifiedBook.FolderPath); StripImgIfWeCannotFindFile(modifiedBook.RawDom, bookFile); StripContentEditableAndTabIndex(modifiedBook.RawDom); InsertReaderStylesheet(modifiedBook.RawDom); RobustFile.Copy(FileLocationUtilities.GetFileDistributedWithApplication(BloomFileLocator.BrowserRoot, "publish", "ReaderPublish", "readerStyles.css"), Path.Combine(modifiedBookFolderPath, "readerStyles.css")); ConvertImagesToBackground(modifiedBook.RawDom); AddDistributionFile(modifiedBookFolderPath, creator, settings); modifiedBook.Save(); return(modifiedBook); }
public static Book.Book PrepareBookForBloomReader(string bookFolderPath, BookServer bookServer, TemporaryFolder temp, WebSocketProgress progress, string creator = "bloom", AndroidPublishSettings settings = null) { // MakeDeviceXmatterTempBook needs to be able to copy customCollectionStyles.css etc into parent of bookFolderPath // And bloom-player expects folder name to match html file name. var htmPath = BookStorage.FindBookHtmlInFolder(bookFolderPath); var tentativeBookFolderPath = Path.Combine(temp.FolderPath, Path.GetFileNameWithoutExtension(htmPath)); Directory.CreateDirectory(tentativeBookFolderPath); var modifiedBook = PublishHelper.MakeDeviceXmatterTempBook(bookFolderPath, bookServer, tentativeBookFolderPath); // Although usually tentativeBookFolderPath and modifiedBook.FolderPath are the same, there are some exceptions // In the process of bringing a book up-to-date (called by MakeDeviceXmatterTempBook), the folder path may change. // For example, it could change if the original folder path contains punctuation marks now deemed dangerous. // The book will be moved to the sanitized version of the file name instead. // It can also happen if we end up picking a different version of the title (i.e. in a different language) // than the one written to the .htm file. string modifiedBookFolderPath = modifiedBook.FolderPath; if (modifiedBook.CollectionSettings.HaveEnterpriseFeatures) { ProcessQuizzes(modifiedBookFolderPath, modifiedBook.RawDom); } // Right here, let's maintain the history of what the BloomdVersion signifies to a reader. // Version 1 (as opposed to no BloomdVersion field): the bookFeatures property may be // used to report features analytics (with earlier bloomd's, the reader must use its own logic) modifiedBook.Storage.BookInfo.MetaData.BloomdVersion = 1; if (settings?.LanguagesToInclude != null) { PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, settings.LanguagesToInclude, modifiedBook.CollectionSettings.Language2.Iso639Code); } else if (Program.RunningHarvesterMode && modifiedBook.OurHtmlDom.SelectSingleNode(BookStorage.ComicalXpath) != null) { // This indicates that we are harvesting a book with comic speech bubbles. // For comical books, we only publish a single language. It's not currently feasible to // allow the reader to switch language in a Comical book, because typically that requires // adjusting the positions of the bubbles, and we don't yet support having more than one // set of bubble locations in a single book. See BL-7912 for some ideas on how we might // eventually improve this. In the meantime, switching language would have bad effects, // and if you can't switch language, there's no point in the book containing more than one. var languagesToInclude = new string[1] { modifiedBook.CollectionSettings.Language1.Iso639Code }; PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, languagesToInclude, modifiedBook.CollectionSettings.Language2.Iso639Code); } // Do this after processing interactive pages, as they can satisfy the criteria for being 'blank' HashSet <string> fontsUsed = null; using (var helper = new PublishHelper()) { helper.ControlForInvoke = ControlForInvoke; ISet <string> warningMessages = new HashSet <string>(); helper.RemoveUnwantedContent(modifiedBook.OurHtmlDom, modifiedBook, false, warningMessages); PublishHelper.SendBatchedWarningMessagesToProgress(warningMessages, progress); fontsUsed = helper.FontsUsed; } modifiedBook.RemoveBlankPages(settings?.LanguagesToInclude); // See https://issues.bloomlibrary.org/youtrack/issue/BL-6835. RemoveInvisibleImageElements(modifiedBook); modifiedBook.Storage.CleanupUnusedImageFiles(keepFilesForEditing: false); if (RobustFile.Exists(Path.Combine(modifiedBookFolderPath, "placeHolder.png"))) { RobustFile.Delete(Path.Combine(modifiedBookFolderPath, "placeHolder.png")); } modifiedBook.Storage.CleanupUnusedAudioFiles(isForPublish: true); modifiedBook.RemoveObsoleteAudioMarkup(); modifiedBook.Storage.CleanupUnusedVideoFiles(); // We want these to run after RemoveUnwantedContent() so that the metadata will more accurately reflect // the subset of contents that are included in the .bloomd modifiedBook.UpdateMetadataFeatures( isBlindEnabled: true, isSignLanguageEnabled: true, isTalkingBookEnabled: true); modifiedBook.SetAnimationDurationsFromAudioDurations(); modifiedBook.OurHtmlDom.SetMedia("bloomReader"); modifiedBook.OurHtmlDom.AddOrReplaceMetaElement("bloom-digital-creator", creator); EmbedFonts(modifiedBook, progress, fontsUsed, new FontFileFinder()); var bookFile = BookStorage.FindBookHtmlInFolder(modifiedBook.FolderPath); StripImgIfWeCannotFindFile(modifiedBook.RawDom, bookFile); StripContentEditableAndTabIndex(modifiedBook.RawDom); InsertReaderStylesheet(modifiedBook.RawDom); RobustFile.Copy(FileLocationUtilities.GetFileDistributedWithApplication(BloomFileLocator.BrowserRoot, "publish", "ReaderPublish", "readerStyles.css"), Path.Combine(modifiedBookFolderPath, "readerStyles.css")); ConvertImagesToBackground(modifiedBook.RawDom); modifiedBook.Save(); return(modifiedBook); }