/// <summary> /// Special processing is needed for defaultLangStyles.css. /// This file is designed to hold information about each language seen by this book and its ancestors. /// But that means we may have font information for a language not present in this version of the book. /// We don't want to include those fonts. /// </summary> private static void ProcessDefaultLangStyles(string bookHtmContent, string defaultLangStylesContent, HashSet <string> result) { if (bookHtmContent == null || defaultLangStylesContent == null) { return; } var htmlDom = new HtmlDom(XmlHtmlConverter.GetXmlDomFromHtml(bookHtmContent, false)); var languagesWithContent = htmlDom.GetLanguagesWithContent().ToArray(); // Find something like this // [lang='en'] // { // font-family: 'Andika New Basic'; // direction: ltr; // } Regex languageCssRegex = new Regex(@"\[\s*lang\s*=\s*['""](.*?)['""]\s*\]\s*{.*?}", RegexOptions.Singleline | RegexOptions.Compiled); // Remove it for languages which are not in the book. foreach (Match match in languageCssRegex.Matches(defaultLangStylesContent)) { var langTag = match.Groups[1].Value; if (languagesWithContent.Contains(langTag)) { continue; } var wholeRuleForLang = match.Groups[0].Value; defaultLangStylesContent = defaultLangStylesContent.Replace(wholeRuleForLang, ""); } HtmlDom.FindFontsUsedInCss(defaultLangStylesContent, result, false); }
/// <summary> /// Examine the stylesheets and collect the font families they mention. /// Note that the process used by ePub and bloomPub publication to /// determine fonts is more complicated, using the DOM in an actual browser. /// </summary> /// <returns>Enumerable of font names</returns> internal static IEnumerable <string> GetFontsUsed(string bookPath) { string bookHtmContent = null; string defaultLangStylesContent = null; var result = new HashSet <string>(); // Css for styles are contained in the actual html foreach (var filePath in Directory.EnumerateFiles(bookPath, "*.*").Where(f => f.EndsWith(".css") || f.EndsWith(".htm") || f.EndsWith(".html"))) { var fileContents = RobustFile.ReadAllText(filePath, Encoding.UTF8); if (filePath.EndsWith(".htm")) { bookHtmContent = fileContents; } else if (filePath.EndsWith("defaultLangStyles.css")) { defaultLangStylesContent = fileContents; // Delay processing defaultLangStyles to the end when we know we have the htm content. continue; } HtmlDom.FindFontsUsedInCss(fileContents, result, false); } ProcessDefaultLangStyles(bookHtmContent, defaultLangStylesContent, result); return(result); }
public static void ReportInvalidFonts(string destDirName, IProgress progress) { // For ePUB and BloomPub, we display the book to determine exactly which fonts are // actually used. We don't have a browser available to do that for uploads, so we scan // css files and the styles set in the html file to see what font-family values are present. // There's also the question of multilanguage books having data that isn't actively // displayed but could potentially be displayed. HashSet <string> fontsFound = new HashSet <string>(); foreach (var filepath in Directory.EnumerateFiles(destDirName, "*.css")) { var cssContent = RobustFile.ReadAllText(filepath); HtmlDom.FindFontsUsedInCss(cssContent, fontsFound, includeFallbackFonts: true); } // There should be only one html file with the same name as the directory it's in, but let's // not make any assumptions here. foreach (var filepath in Directory.EnumerateFiles(destDirName, "*.htm")) { var cssContent = RobustFile.ReadAllText(filepath); HtmlDom.FindFontsUsedInCss(cssContent, fontsFound, includeFallbackFonts: true); // works on HTML files as well } if (_fontMetadataMap == null) { _fontMetadataMap = new Dictionary <string, FontMetadata>(); foreach (var meta in FontsApi.AvailableFontMetadata) { _fontMetadataMap.Add(meta.name, meta); } } var cssGenericFonts = new HashSet <string> { "serif", "sans-serif", "cursive", "fantasy", "monospace" }; foreach (var font in fontsFound) { if (cssGenericFonts.Contains(font.ToLowerInvariant())) { continue; } if (_fontMetadataMap.TryGetValue(font, out var meta)) { string msg2 = null; switch (meta.determinedSuitability) { case FontMetadata.kOK: break; case FontMetadata.kUnknown: //progress.WriteWarning("This book has a font, \"{0}\", which has an unknown license.", font); break; case FontMetadata.kUnsuitable: msg2 = LocalizationManager.GetString("PublishTab.FontProblem.License", "The metadata inside this font tells us that it may not be embedded for free in ebooks and the web."); break; case FontMetadata.kInvalid: if (meta.determinedSuitabilityNotes.Contains("exception")) { msg2 = LocalizationManager.GetString("PublishTab.FontProblem.Exception", "The font's file cannot be processed by Bloom and may be corrupted or not a font file."); } else { msg2 = String.Format(LocalizationManager.GetString("PublishTab.FontProblem.Format", "Bloom cannot publish ePUBs and BloomPubs with this font's format ({0})."), meta.fileExtension); } break; } if (msg2 != null) { var msgFmt1 = LocalizationManager.GetString("PublishTab.FontProblem", "This book has a font, \"{0}\", which has the following problem:"); var msg3 = LocalizationManager.GetString("PublishTab.FontProblem.Result", "BloomLibrary.org will display the PDF and allow downloads for translation, but cannot offer the “READ” button or downloads for BloomPUB or ePUB."); // progress.WriteError() uses Color.Red, but also exposes a link to "report error" which we don't want here. progress.WriteMessageWithColor("Red", msgFmt1, font); progress.WriteMessageWithColor("Red", " \u2022 {0}", msg2); progress.WriteMessageWithColor("Red", " \u2022 {0}", msg3); } } else { //progress.WriteWarning("This book has a font, \"{0}\", which is not on this computer and whose license is unknown.", font); } } }