Ejemplo n.º 1
0
        public void RemoveUnwantedLanguageData_TextPage_RemovesUnwantedDivs()
        {
            var html = @"<!DOCTYPE html>
<html>
<body>
	<div class='bloom-page numberedPage customPage side-right A4Landscape bloom-monolingual' data-page='' id='ba82b94f-71ec-48f7-a1cc-a68d5765e255' data-page-number='2' testRemoves='false' lang=''>
		<div class='pageLabel' data-i18n='TemplateBooks.PageLabel.Just Text' testRemoves='false' lang='en'>
			Just Text
		</div>
		<div class='pageDescription' testRemoves='false' lang='en'></div>
		<div class='marginBox'>
			<div class='split-pane-component-inner'>
				<div aria-describedby='qtip-0' data-hasqtip='true' class='bloom-translationGroup bloom-trailingElement' data-default-languages='auto'>
					<div role='textbox' class='bloom-editable normal-style bloom-content1 bloom-visibility-code-on' contenteditable='true' testRemoves='false' lang='en'>
						<p>This is Robin.</p>
					</div>
					<div role='textbox' class='bloom-editable normal-style' contenteditable='true' testRemoves='true' lang='tl'>
						<p>Ako si Robin</p>
					</div>
					<div role='textbox' class='bloom-editable normal-style' contenteditable='true' testRemoves='true' lang='ceb'></div>
					<div class='bloom-editable normal-style' contenteditable='true' testRemoves='false' lang='z'></div>
				</div>
			</div>
		</div>
	</div>
</body>
</html>";
            // Check occurrences in original HTML.
            var dom           = new HtmlDom(html);
            var assertThatDom = AssertThatXmlIn.Dom(dom.RawDom);

            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='' and contains(@class, 'bloom-page')]", 1);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageLabel']", 1);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageDescription']", 1);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and contains(@class, 'bloom-editable') and @role='textbox']", 1);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='tl' and contains(@class, 'bloom-editable') and @role='textbox']", 1);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='ceb' and contains(@class, 'bloom-editable') and @role='textbox']", 1);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='z' and contains(@class, 'bloom-editable') and @contenteditable='true']", 1);

            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang]", 7);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang and @testRemoves='false']", 5);
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang and @testRemoves='true']", 2);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" });

            // Check occurrences in modified HTML.
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='' and contains(@class, 'bloom-page')]", 1);                                  // unchanged
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageLabel']", 1);                                            // unchanged
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and @class='pageDescription']", 1);                                      // unchanged
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='en' and contains(@class, 'bloom-editable') and @role='textbox']", 1);        // unchanged
            assertThatDom.HasNoMatchForXpath("//div[@lang='tl' and contains(@class, 'bloom-editable') and @role='textbox']");                            // removed editable textbox
            assertThatDom.HasNoMatchForXpath("//div[@lang='ceb' and contains(@class, 'bloom-editable') and @role='textbox']");                           // removed editable textbox
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang='z' and contains(@class, 'bloom-editable') and @contenteditable='true']", 1); // unchanged

            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang]", 5);                                                                        // removed 2 editable textboxes
            assertThatDom.HasSpecifiedNumberOfMatchesForXpath("//div[@lang and @testRemoves='false']", 5);
            assertThatDom.HasNoMatchForXpath("//div[@lang and @testRemoves='true']");
        }
Ejemplo n.º 2
0
 public void RemoveUnwantedLanguageData(string destDirName, IEnumerable <string> languagesToInclude)
 {
     // There should be only one html file with the same name as the directory it's in, but let's
     // not make any assumptions here.
     foreach (var filepath in Directory.EnumerateFiles(destDirName, "*.htm"))
     {
         var xmlDomFromHtmlFile = XmlHtmlConverter.GetXmlDomFromHtmlFile(filepath, false);
         var dom = new HtmlDom(xmlDomFromHtmlFile);
         PublishModel.RemoveUnwantedLanguageData(dom, languagesToInclude);
         XmlHtmlConverter.SaveDOMAsHtml5(dom.RawDom, filepath);
     }
 }
Ejemplo n.º 3
0
        public void RemoveUnwantedLanguageData_BloomDataDiv_RemovesNothingEvenWithN1()
        {
            var dom = new HtmlDom(kDataDivHtml);

            // Check occurrences in original HTML.
            VerifyDataDivValues(dom);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "en");

            // Check occurrences in modified HTML.  This should be exactly the same as before.
            VerifyDataDivValues(dom);
        }
Ejemplo n.º 4
0
        public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWantedWithXmatterN1()
        {
            var dom = new HtmlDom(kEmbeddedLangDivsXMatterHtml);

            // Check occurrences in original HTML.
            VerifyOriginalEmbeddedDivsAreAllThere(dom);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "de");

            // Check occurrences in modified HTML: German should be preserved, French and Spanish removed.
            VerifyOnlyUnwantedEmbeddedDivsAreRemoved(dom, true);
        }
Ejemplo n.º 5
0
        public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWantedWithXmatter()
        {
            var dom = new HtmlDom(kEmbeddedLangDivsXMatterHtml);

            // Check occurrences in original HTML.
            VerifyOriginalEmbeddedDivsAreAllThere(dom);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" });

            // Check occurrences in modified HTML: nothing removed from xmatter unless national language is specified.
            VerifyOriginalEmbeddedDivsAreAllThere(dom);
        }
Ejemplo n.º 6
0
        public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWantedWithN1()
        {
            var dom = new HtmlDom(kEmbeddedLangDivsHtml);

            // Check occurrences in original HTML.
            VerifyOriginalEmbeddedDivsAreAllThere(dom);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "de");

            // Check occurrences in modified HTML: should be same for content page regardless of specifying national language.
            VerifyOnlyUnwantedEmbeddedDivsAreRemoved(dom, false);
        }
Ejemplo n.º 7
0
        public void RemoveUnwantedLanguageData_PreserveIfEmbeddedDivWanted()
        {
            var dom = new HtmlDom(kEmbeddedLangDivsHtml);

            // Check occurrences in original HTML.
            VerifyOriginalEmbeddedDivsAreAllThere(dom);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" });

            // Check occurrences in modified HTML.
            VerifyOnlyUnwantedEmbeddedDivsAreRemoved(dom, false);
        }
Ejemplo n.º 8
0
        public void RemoveUnwantedLanguageData_CreditsPage_RemovesRemovesUnwantedButKeepsN1()
        {
            var dom = new HtmlDom(kCreditsPageHtml);

            // Check occurrences in original HTML.
            VerifyCreditsPageValues(dom, false);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" }, "en");

            // Check occurrences in modified HTML.  This should NOT be exactly the same as before.
            VerifyCreditsPageValues(dom, true);
        }
Ejemplo n.º 9
0
        public void RemoveUnwantedLanguageData_FrontCoverPage_RemovesNothing()
        {
            var dom = new HtmlDom(kFrontCoverHtml);

            // Check occurrences in original HTML.
            VerifyFrontCoverValues(dom, false);

            // SUT
            PublishModel.RemoveUnwantedLanguageData(dom, new[] { "en" });

            // Check occurrences in modified HTML.  This should be exactly the same as before.
            VerifyFrontCoverValues(dom, false);
        }
Ejemplo n.º 10
0
        public static Book.Book PrepareBookForBloomReader(string bookFolderPath, BookServer bookServer,
                                                          TemporaryFolder temp,
                                                          IWebSocketProgress progress, bool isTemplateBook,
                                                          string creator = kCreatorBloom,
                                                          AndroidPublishSettings settings = null)
        {
            // MakeDeviceXmatterTempBook needs to be able to copy customCollectionStyles.css etc into parent of bookFolderPath
            // And bloom-player expects folder name to match html file name.
            var htmPath = BookStorage.FindBookHtmlInFolder(bookFolderPath);
            var tentativeBookFolderPath = Path.Combine(temp.FolderPath,
                                                       // Windows directory names cannot have trailing periods, but FileNameWithoutExtension can have these.  (BH-6097)
                                                       BookStorage.SanitizeNameForFileSystem(Path.GetFileNameWithoutExtension(htmPath)));

            Directory.CreateDirectory(tentativeBookFolderPath);
            var modifiedBook = PublishHelper.MakeDeviceXmatterTempBook(bookFolderPath, bookServer,
                                                                       tentativeBookFolderPath, isTemplateBook);

            modifiedBook.SetMotionAttributesOnBody(settings?.Motion ?? false);

            // Although usually tentativeBookFolderPath and modifiedBook.FolderPath are the same, there are some exceptions
            // In the process of bringing a book up-to-date (called by MakeDeviceXmatterTempBook), the folder path may change.
            // For example, it could change if the original folder path contains punctuation marks now deemed dangerous.
            //    The book will be moved to the sanitized version of the file name instead.
            // It can also happen if we end up picking a different version of the title (i.e. in a different language)
            //    than the one written to the .htm file.
            string modifiedBookFolderPath = modifiedBook.FolderPath;

            if (modifiedBook.CollectionSettings.HaveEnterpriseFeatures)
            {
                ProcessQuizzes(modifiedBookFolderPath, modifiedBook.RawDom);
            }

            // Right here, let's maintain the history of what the BloomdVersion signifies to a reader.
            // Version 1 (as opposed to no BloomdVersion field): the bookFeatures property may be
            // used to report features analytics (with earlier bloompub's, the reader must use its own logic)
            modifiedBook.Storage.BookInfo.MetaData.BloomdVersion = 1;

            modifiedBook.Storage.BookInfo.UpdateOneSingletonTag("distribution", settings?.DistributionTag);
            if (!string.IsNullOrEmpty(settings?.BookshelfTag))
            {
                modifiedBook.Storage.BookInfo.UpdateOneSingletonTag("bookshelf", settings.BookshelfTag);
            }

            if (settings?.RemoveInteractivePages ?? false)
            {
                var activities = modifiedBook.GetPageElements().Cast <XmlNode>()
                                 .Where(x => x is XmlElement elt && HtmlDom.IsActivityPage(elt)).ToArray();
                foreach (var page in activities)
                {
                    page.ParentNode.RemoveChild(page);
                }
            }

            if (settings?.LanguagesToInclude != null)
            {
                PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, settings.LanguagesToInclude, modifiedBook.BookData.MetadataLanguage1IsoCode);
                PublishModel.RemoveUnwantedLanguageRulesFromCssFiles(modifiedBook.FolderPath, settings.LanguagesToInclude);
            }
            else if (Program.RunningHarvesterMode && modifiedBook.OurHtmlDom.SelectSingleNode(BookStorage.ComicalXpath) != null)
            {
                // This indicates that we are harvesting a book with comic speech bubbles or other overlays (Overlay Tool).
                // For books with overlays, we only publish a single language. It's not currently feasible to
                // allow the reader to switch language in a book with overlays, because typically that requires
                // adjusting the positions of the overlays, and we don't yet support having more than one
                // set of overlay locations in a single book. See BL-7912 for some ideas on how we might
                // eventually improve this. In the meantime, switching language would have bad effects,
                // and if you can't switch language, there's no point in the book containing more than one.
                var languagesToInclude = new string[1] {
                    modifiedBook.BookData.Language1.Iso639Code
                };
                PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, languagesToInclude, modifiedBook.BookData.MetadataLanguage1IsoCode);
            }

            // Do this after processing interactive pages, as they can satisfy the criteria for being 'blank'
            HashSet <string> fontsUsed = null;

            using (var helper = new PublishHelper())
            {
                helper.ControlForInvoke = ControlForInvoke;
                ISet <string> warningMessages = new HashSet <string>();
                helper.RemoveUnwantedContent(modifiedBook.OurHtmlDom, modifiedBook, false,
                                             warningMessages, keepPageLabels: settings?.WantPageLabels ?? false);
                PublishHelper.SendBatchedWarningMessagesToProgress(warningMessages, progress);
                fontsUsed = helper.FontsUsed;
            }
            if (!modifiedBook.IsTemplateBook)
            {
                modifiedBook.RemoveBlankPages(settings?.LanguagesToInclude);
            }

            // See https://issues.bloomlibrary.org/youtrack/issue/BL-6835.
            RemoveInvisibleImageElements(modifiedBook);
            modifiedBook.Storage.CleanupUnusedSupportFiles(/*isForPublish:*/ true, settings?.AudioLanguagesToExclude);
            if (!modifiedBook.IsTemplateBook && RobustFile.Exists(Path.Combine(modifiedBookFolderPath, "placeHolder.png")))
            {
                RobustFile.Delete(Path.Combine(modifiedBookFolderPath, "placeHolder.png"));
            }
            modifiedBook.RemoveObsoleteAudioMarkup();

            // We want these to run after RemoveUnwantedContent() so that the metadata will more accurately reflect
            // the subset of contents that are included in the .bloompub
            // Note that we generally want to disable features here, but not enable them, especially while
            // running harvester!  See https://issues.bloomlibrary.org/youtrack/issue/BL-8995.
            var enableBlind = modifiedBook.BookInfo.MetaData.Feature_Blind || !Program.RunningHarvesterMode;
            // BloomReader and BloomPlayer are not using the SignLanguage feature, and it's misleading to
            // assume the existence of videos implies sign language.  There is a separate "Video" feature
            // now that gets set automatically.  (Automated setting of the Blind feature is imperfect, but
            // more meaningful than trying to automate sign language just based on one video existing.)
            var enableSignLanguage = modifiedBook.BookInfo.MetaData.Feature_SignLanguage;

            modifiedBook.UpdateMetadataFeatures(
                isBlindEnabled: enableBlind,
                isSignLanguageEnabled: enableSignLanguage,
                isTalkingBookEnabled: true,            // talkingBook is only ever set automatically as far as I can tell.
                allowedLanguages: null                 // allow all because we've already filtered out the unwanted ones from the dom above.
                );

            modifiedBook.SetAnimationDurationsFromAudioDurations();

            modifiedBook.OurHtmlDom.SetMedia("bloomReader");
            modifiedBook.OurHtmlDom.AddOrReplaceMetaElement("bloom-digital-creator", creator);
            EmbedFonts(modifiedBook, progress, fontsUsed, FontFileFinder.GetInstance(Program.RunningUnitTests));

            var bookFile = BookStorage.FindBookHtmlInFolder(modifiedBook.FolderPath);

            StripImgIfWeCannotFindFile(modifiedBook.RawDom, bookFile);
            StripContentEditableAndTabIndex(modifiedBook.RawDom);
            InsertReaderStylesheet(modifiedBook.RawDom);
            RobustFile.Copy(FileLocationUtilities.GetFileDistributedWithApplication(BloomFileLocator.BrowserRoot, "publish", "ReaderPublish", "readerStyles.css"),
                            Path.Combine(modifiedBookFolderPath, "readerStyles.css"));
            ConvertImagesToBackground(modifiedBook.RawDom);

            AddDistributionFile(modifiedBookFolderPath, creator, settings);

            modifiedBook.Save();

            return(modifiedBook);
        }
Ejemplo n.º 11
0
        public static Book.Book PrepareBookForBloomReader(string bookFolderPath, BookServer bookServer, TemporaryFolder temp,
                                                          WebSocketProgress progress, string creator = "bloom", AndroidPublishSettings settings = null)
        {
            // MakeDeviceXmatterTempBook needs to be able to copy customCollectionStyles.css etc into parent of bookFolderPath
            // And bloom-player expects folder name to match html file name.
            var htmPath = BookStorage.FindBookHtmlInFolder(bookFolderPath);
            var tentativeBookFolderPath = Path.Combine(temp.FolderPath, Path.GetFileNameWithoutExtension(htmPath));

            Directory.CreateDirectory(tentativeBookFolderPath);
            var modifiedBook = PublishHelper.MakeDeviceXmatterTempBook(bookFolderPath, bookServer, tentativeBookFolderPath);

            // Although usually tentativeBookFolderPath and modifiedBook.FolderPath are the same, there are some exceptions
            // In the process of bringing a book up-to-date (called by MakeDeviceXmatterTempBook), the folder path may change.
            // For example, it could change if the original folder path contains punctuation marks now deemed dangerous.
            //    The book will be moved to the sanitized version of the file name instead.
            // It can also happen if we end up picking a different version of the title (i.e. in a different language)
            //    than the one written to the .htm file.
            string modifiedBookFolderPath = modifiedBook.FolderPath;

            if (modifiedBook.CollectionSettings.HaveEnterpriseFeatures)
            {
                ProcessQuizzes(modifiedBookFolderPath, modifiedBook.RawDom);
            }

            // Right here, let's maintain the history of what the BloomdVersion signifies to a reader.
            // Version 1 (as opposed to no BloomdVersion field): the bookFeatures property may be
            // used to report features analytics (with earlier bloomd's, the reader must use its own logic)
            modifiedBook.Storage.BookInfo.MetaData.BloomdVersion = 1;

            if (settings?.LanguagesToInclude != null)
            {
                PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, settings.LanguagesToInclude, modifiedBook.CollectionSettings.Language2.Iso639Code);
            }
            else if (Program.RunningHarvesterMode && modifiedBook.OurHtmlDom.SelectSingleNode(BookStorage.ComicalXpath) != null)
            {
                // This indicates that we are harvesting a book with comic speech bubbles.
                // For comical books, we only publish a single language. It's not currently feasible to
                // allow the reader to switch language in a Comical book, because typically that requires
                // adjusting the positions of the bubbles, and we don't yet support having more than one
                // set of bubble locations in a single book. See BL-7912 for some ideas on how we might
                // eventually improve this. In the meantime, switching language would have bad effects,
                // and if you can't switch language, there's no point in the book containing more than one.
                var languagesToInclude = new string[1] {
                    modifiedBook.CollectionSettings.Language1.Iso639Code
                };
                PublishModel.RemoveUnwantedLanguageData(modifiedBook.OurHtmlDom, languagesToInclude, modifiedBook.CollectionSettings.Language2.Iso639Code);
            }

            // Do this after processing interactive pages, as they can satisfy the criteria for being 'blank'
            HashSet <string> fontsUsed = null;

            using (var helper = new PublishHelper())
            {
                helper.ControlForInvoke = ControlForInvoke;
                ISet <string> warningMessages = new HashSet <string>();
                helper.RemoveUnwantedContent(modifiedBook.OurHtmlDom, modifiedBook, false, warningMessages);
                PublishHelper.SendBatchedWarningMessagesToProgress(warningMessages, progress);
                fontsUsed = helper.FontsUsed;
            }
            modifiedBook.RemoveBlankPages(settings?.LanguagesToInclude);

            // See https://issues.bloomlibrary.org/youtrack/issue/BL-6835.
            RemoveInvisibleImageElements(modifiedBook);
            modifiedBook.Storage.CleanupUnusedImageFiles(keepFilesForEditing: false);
            if (RobustFile.Exists(Path.Combine(modifiedBookFolderPath, "placeHolder.png")))
            {
                RobustFile.Delete(Path.Combine(modifiedBookFolderPath, "placeHolder.png"));
            }

            modifiedBook.Storage.CleanupUnusedAudioFiles(isForPublish: true);
            modifiedBook.RemoveObsoleteAudioMarkup();
            modifiedBook.Storage.CleanupUnusedVideoFiles();

            // We want these to run after RemoveUnwantedContent() so that the metadata will more accurately reflect
            // the subset of contents that are included in the .bloomd
            modifiedBook.UpdateMetadataFeatures(
                isBlindEnabled: true,
                isSignLanguageEnabled: true,
                isTalkingBookEnabled: true);

            modifiedBook.SetAnimationDurationsFromAudioDurations();

            modifiedBook.OurHtmlDom.SetMedia("bloomReader");
            modifiedBook.OurHtmlDom.AddOrReplaceMetaElement("bloom-digital-creator", creator);
            EmbedFonts(modifiedBook, progress, fontsUsed, new FontFileFinder());

            var bookFile = BookStorage.FindBookHtmlInFolder(modifiedBook.FolderPath);

            StripImgIfWeCannotFindFile(modifiedBook.RawDom, bookFile);
            StripContentEditableAndTabIndex(modifiedBook.RawDom);
            InsertReaderStylesheet(modifiedBook.RawDom);
            RobustFile.Copy(FileLocationUtilities.GetFileDistributedWithApplication(BloomFileLocator.BrowserRoot, "publish", "ReaderPublish", "readerStyles.css"),
                            Path.Combine(modifiedBookFolderPath, "readerStyles.css"));
            ConvertImagesToBackground(modifiedBook.RawDom);

            modifiedBook.Save();

            return(modifiedBook);
        }