// The algorithm for this method comes from the implementer notes in [MS-OI29500].pdf // section 2.1.87 // The implementer notes are at: // http://msdn.microsoft.com/en-us/library/ee908652.aspx public static FontType DetermineFontTypeFromCharacter(char ch, CharStyleAttributes csa) { // If the run has the cs element ("[ISO/IEC-29500-1] §17.3.2.7; cs") or the rtl element ("[ISO/IEC-29500-1] §17.3.2.30; rtl"), // then the cs (or cstheme if defined) font is used, regardless of the Unicode character values of the run’s content. if (csa.Rtl) { return FontType.CS; } // A large percentage of characters will fall in the following rule. // Unicode Block: Basic Latin if (ch >= 0x00 && ch <= 0x7f) { return FontType.Ascii; } // If the eastAsia (or eastAsiaTheme if defined) attribute’s value is “Times New Roman” and the ascii (or asciiTheme if defined) // and hAnsi (or hAnsiTheme if defined) attributes are equal, then the ascii (or asciiTheme if defined) font is used. if (csa.EastAsiaFont == "Times New Roman" && csa.AsciiFont == csa.HAnsiFont) { return FontType.Ascii; } // Unicode BLock: Latin-1 Supplement if (ch >= 0xA0 && ch <= 0xFF) { if (csa.Hint == "eastAsia") { if (ch == 0xA1 || ch == 0xA4 || ch == 0xA7 || ch == 0xA8 || ch == 0xAA || ch == 0xAD || ch == 0xAF || (ch >= 0xB0 && ch <= 0xB4) || (ch >= 0xB6 && ch <= 0xBA) || (ch >= 0xBC && ch <= 0xBF) || ch == 0xD7 || ch == 0xF7) { return FontType.EastAsia; } if (csa.EastAsiaLang == "zh-hant" || csa.EastAsiaLang == "zh-hans") { if (ch == 0xE0 || ch == 0xE1 || (ch >= 0xE8 && ch <= 0xEA) || (ch >= 0xEC && ch <= 0xED) || (ch >= 0xF2 && ch <= 0xF3) || (ch >= 0xF9 && ch <= 0xFA) || ch == 0xFC) { return FontType.EastAsia; } } } return FontType.HAnsi; } // Unicode Block: Latin Extended-A if (ch >= 0x0100 && ch <= 0x017F) { if (csa.Hint == "eastAsia") { if (csa.EastAsiaLang == "zh-hant" || csa.EastAsiaLang == "zh-hans" /* || the character set of the east Asia (or east Asia theme) font is Chinese5 || GB2312 todo */) { return FontType.EastAsia; } } return FontType.HAnsi; } // Unicode Block: Latin Extended-B if (ch >= 0x0180 && ch <= 0x024F) { if (csa.Hint == "eastAsia") { if (csa.EastAsiaLang == "zh-hant" || csa.EastAsiaLang == "zh-hans" /* || the character set of the east Asia (or east Asia theme) font is Chinese5 || GB2312 todo */) { return FontType.EastAsia; } } return FontType.HAnsi; } // Unicode Block: IPA Extensions if (ch >= 0x0250 && ch <= 0x02AF) { if (csa.Hint == "eastAsia") { if (csa.EastAsiaLang == "zh-hant" || csa.EastAsiaLang == "zh-hans" /* || the character set of the east Asia (or east Asia theme) font is Chinese5 || GB2312 todo */) { return FontType.EastAsia; } } return FontType.HAnsi; } // Unicode Block: Spacing Modifier Letters if (ch >= 0x02B0 && ch <= 0x02FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Combining Diacritic Marks if (ch >= 0x0300 && ch <= 0x036F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Greek if (ch >= 0x0370 && ch <= 0x03CF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Cyrillic if (ch >= 0x0400 && ch <= 0x04FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Hebrew if (ch >= 0x0590 && ch <= 0x05FF) { return FontType.Ascii; } // Unicode Block: Arabic if (ch >= 0x0600 && ch <= 0x06FF) { return FontType.Ascii; } // Unicode Block: Syriac if (ch >= 0x0700 && ch <= 0x074F) { return FontType.Ascii; } // Unicode Block: Arabic Supplement if (ch >= 0x0750 && ch <= 0x077F) { return FontType.Ascii; } // Unicode Block: Thanna if (ch >= 0x0780 && ch <= 0x07BF) { return FontType.Ascii; } // Unicode Block: Hangul Jamo if (ch >= 0x1100 && ch <= 0x11FF) { return FontType.EastAsia; } // Unicode Block: Latin Extended Additional if (ch >= 0x1E00 && ch <= 0x1EFF) { if (csa.Hint == "eastAsia" && (csa.EastAsiaLang == "zh-hant" || csa.EastAsiaLang == "zh-hans")) { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: General Punctuation if (ch >= 0x2000 && ch <= 0x206F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Superscripts and Subscripts if (ch >= 0x2070 && ch <= 0x209F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Currency Symbols if (ch >= 0x20A0 && ch <= 0x20CF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Combining Diacritical Marks for Symbols if (ch >= 0x20D0 && ch <= 0x20FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Letter-like Symbols if (ch >= 0x2100 && ch <= 0x214F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Number Forms if (ch >= 0x2150 && ch <= 0x218F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Arrows if (ch >= 0x2190 && ch <= 0x21FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Mathematical Operators if (ch >= 0x2200 && ch <= 0x22FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Miscellaneous Technical if (ch >= 0x2300 && ch <= 0x23FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Control Pictures if (ch >= 0x2400 && ch <= 0x243F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Optical Character Recognition if (ch >= 0x2440 && ch <= 0x245F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Enclosed Alphanumerics if (ch >= 0x2460 && ch <= 0x24FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Box Drawing if (ch >= 0x2500 && ch <= 0x257F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Block Elements if (ch >= 0x2580 && ch <= 0x259F) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Geometric Shapes if (ch >= 0x25A0 && ch <= 0x25FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Miscellaneous Symbols if (ch >= 0x2600 && ch <= 0x26FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Dingbats if (ch >= 0x2700 && ch <= 0x27BF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: CJK Radicals Supplement if (ch >= 0x2E80 && ch <= 0x2EFF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: Kangxi Radicals if (ch >= 0x2F00 && ch <= 0x2FDF) { return FontType.EastAsia; } // Unicode Block: Ideographic Description Characters if (ch >= 0x2FF0 && ch <= 0x2FFF) { return FontType.EastAsia; } // Unicode Block: CJK Symbols and Punctuation if (ch >= 0x3000 && ch <= 0x303F) { return FontType.EastAsia; } // Unicode Block: Hiragana if (ch >= 0x3040 && ch <= 0x309F) { return FontType.EastAsia; } // Unicode Block: Katakana if (ch >= 0x30A0 && ch <= 0x30FF) { return FontType.EastAsia; } // Unicode Block: Bopomofo if (ch >= 0x3100 && ch <= 0x312F) { return FontType.EastAsia; } // Unicode Block: Hangul Compatibility Jamo if (ch >= 0x3130 && ch <= 0x318F) { return FontType.EastAsia; } // Unicode Block: Kanbun if (ch >= 0x3190 && ch <= 0x319F) { return FontType.EastAsia; } // Unicode Block: Enclosed CJK Letters and Months if (ch >= 0x3200 && ch <= 0x32FF) { return FontType.EastAsia; } // Unicode Block: CJK Compatibility if (ch >= 0x3300 && ch <= 0x33FF) { return FontType.EastAsia; } // Unicode Block: CJK Unified Ideographs Extension A if (ch >= 0x3400 && ch <= 0x4DBF) { return FontType.EastAsia; } // Unicode Block: CJK Unified Ideographs if (ch >= 0x4E00 && ch <= 0x9FAF) { return FontType.EastAsia; } // Unicode Block: Yi Syllables if (ch >= 0xA000 && ch <= 0xA48F) { return FontType.EastAsia; } // Unicode Block: Yi Radicals if (ch >= 0xA490 && ch <= 0xA4CF) { return FontType.EastAsia; } // Unicode Block: Hangul Syllables if (ch >= 0xAC00 && ch <= 0xD7AF) { return FontType.EastAsia; } // Unicode Block: High Surrogates if (ch >= 0xD800 && ch <= 0xDB7F) { return FontType.EastAsia; } // Unicode Block: High Private Use Surrogates if (ch >= 0xDB80 && ch <= 0xDBFF) { return FontType.EastAsia; } // Unicode Block: Low Surrogates if (ch >= 0xDC00 && ch <= 0xDFFF) { return FontType.EastAsia; } // Unicode Block: Private Use Area if (ch >= 0xE000 && ch <= 0xF8FF) { if (csa.Hint == "eastAsia") { return FontType.EastAsia; } return FontType.HAnsi; } // Unicode Block: CJK Compatibility Ideographs if (ch >= 0xF900 && ch <= 0xFAFF) { return FontType.EastAsia; } // Unicode Block: Alphabetic Presentation Forms if (ch >= 0xFB00 && ch <= 0xFB4F) { if (csa.Hint == "eastAsia") { if (ch >= 0xFB00 && ch <= 0xFB1C) return FontType.EastAsia; if (ch >= 0xFB1D && ch <= 0xFB4F) return FontType.Ascii; } return FontType.HAnsi; } // Unicode Block: Arabic Presentation Forms-A if (ch >= 0xFB50 && ch <= 0xFDFF) { return FontType.Ascii; } // Unicode Block: CJK Compatibility Forms if (ch >= 0xFE30 && ch <= 0xFE4F) { return FontType.EastAsia; } // Unicode Block: Small Form Variants if (ch >= 0xFE50 && ch <= 0xFE6F) { return FontType.EastAsia; } // Unicode Block: Arabic Presentation Forms-B if (ch >= 0xFE70 && ch <= 0xFEFE) { return FontType.Ascii; } // Unicode Block: Halfwidth and Fullwidth Forms if (ch >= 0xFF00 && ch <= 0xFFEF) { return FontType.EastAsia; } return FontType.HAnsi; }
private static void AdjustFontAttributes(WordprocessingDocument wDoc, XElement paraOrRun, XElement pPr, XElement rPr, FormattingAssemblerSettings settings) { XDocument themeXDoc = null; if (wDoc.MainDocumentPart.ThemePart != null) themeXDoc = wDoc.MainDocumentPart.ThemePart.GetXDocument(); XElement fontScheme = null; XElement majorFont = null; XElement minorFont = null; if (themeXDoc != null) { fontScheme = themeXDoc.Root.Element(A.themeElements).Element(A.fontScheme); majorFont = fontScheme.Element(A.majorFont); minorFont = fontScheme.Element(A.minorFont); } var rFonts = rPr.Element(W.rFonts); if (rFonts == null) { return; } var asciiTheme = (string)rFonts.Attribute(W.asciiTheme); var hAnsiTheme = (string)rFonts.Attribute(W.hAnsiTheme); var eastAsiaTheme = (string)rFonts.Attribute(W.eastAsiaTheme); var cstheme = (string)rFonts.Attribute(W.cstheme); string ascii = null; string hAnsi = null; string eastAsia = null; string cs = null; XElement minorLatin = null; string minorLatinTypeface = null; XElement majorLatin = null; string majorLatinTypeface = null; if (minorFont != null) { minorLatin = minorFont.Element(A.latin); minorLatinTypeface = (string)minorLatin.Attribute("typeface"); } if (majorFont != null) { majorLatin = majorFont.Element(A.latin); majorLatinTypeface = (string)majorLatin.Attribute("typeface"); } if (asciiTheme != null) { if (asciiTheme.StartsWith("minor") && minorLatinTypeface != null) { ascii = minorLatinTypeface; } else if (asciiTheme.StartsWith("major") && majorLatinTypeface != null) { ascii = majorLatinTypeface; } } if (hAnsiTheme != null) { if (hAnsiTheme.StartsWith("minor") && minorLatinTypeface != null) { hAnsi = minorLatinTypeface; } else if (hAnsiTheme.StartsWith("major") && majorLatinTypeface != null) { hAnsi = majorLatinTypeface; } } if (eastAsiaTheme != null) { if (eastAsiaTheme.StartsWith("minor") && minorLatinTypeface != null) { eastAsia = minorLatinTypeface; } else if (eastAsiaTheme.StartsWith("major") && majorLatinTypeface != null) { eastAsia = majorLatinTypeface; } } if (cstheme != null) { if (cstheme.StartsWith("minor") && minorFont != null) { cs = (string)minorFont.Element(A.cs).Attribute("typeface"); } else if (cstheme.StartsWith("major") && majorFont != null) { cs = (string)majorFont.Element(A.cs).Attribute("typeface"); } } if (ascii != null) { rFonts.SetAttributeValue(W.ascii, ascii); } if (hAnsi != null) { rFonts.SetAttributeValue(W.hAnsi, hAnsi); } if (eastAsia != null) { rFonts.SetAttributeValue(W.eastAsia, eastAsia); } if (cs != null) { rFonts.SetAttributeValue(W.cs, cs); } var firstTextNode = paraOrRun.Descendants(W.t).FirstOrDefault(t => t.Value.Length > 0); string str = " "; // if there is a run with no text in it, then no need to do any of the rest of this method. if (firstTextNode == null && paraOrRun.Name == W.r) return; if (firstTextNode != null) str = firstTextNode.Value; var csa = new CharStyleAttributes(pPr, rPr); // This module determines the font based on just the first character. // Technically, a run can contain characters from different Unicode code blocks, and hence should be rendered with different fonts. // However, Word breaks up runs that use more than one font into multiple runs. Other producers of WordprocessingML may not, so in // that case, this routine may need to be augmented to look at all characters in a run. /* old code var fontFamilies = str.select(function (c) { var ft = Pav.DetermineFontTypeFromCharacter(c, csa); switch (ft) { case Pav.FontType.Ascii: return cast(rFonts.attribute(W.ascii)); case Pav.FontType.HAnsi: return cast(rFonts.attribute(W.hAnsi)); case Pav.FontType.EastAsia: return cast(rFonts.attribute(W.eastAsia)); case Pav.FontType.CS: return cast(rFonts.attribute(W.cs)); default: return null; } }) .where(function (f) { return f != null && f != ""; }) .distinct() .select(function (f) { return new Pav.FontFamily(f); }) .toArray(); */ var charToExamine = str.FirstOrDefault(c => ! WeakAndNeutralDirectionalCharacters.Contains(c)); if (charToExamine == '\0') charToExamine = str[0]; var ft = DetermineFontTypeFromCharacter(charToExamine, csa); string fontType = null; string languageType = null; switch (ft) { case FontType.Ascii: fontType = (string)rFonts.Attribute(W.ascii); languageType = "western"; break; case FontType.HAnsi: fontType = (string)rFonts.Attribute(W.hAnsi); languageType = "western"; break; case FontType.EastAsia: if (settings.RestrictToSupportedLanguages) throw new UnsupportedLanguageException("EastAsia languages are not supported"); fontType = (string)rFonts.Attribute(W.eastAsia); languageType = "eastAsia"; break; case FontType.CS: if (settings.RestrictToSupportedLanguages) throw new UnsupportedLanguageException("Complex script (RTL) languages are not supported"); fontType = (string)rFonts.Attribute(W.cs); languageType = "bidi"; break; } if (fontType != null) { if (paraOrRun.Attribute(PtOpenXml.FontName) == null) { XAttribute fta = new XAttribute(PtOpenXml.FontName, fontType.ToString()); paraOrRun.Add(fta); } else { paraOrRun.Attribute(PtOpenXml.FontName).Value = fontType.ToString(); } } if (languageType != null) { if (paraOrRun.Attribute(PtOpenXml.LanguageType) == null) { XAttribute lta = new XAttribute(PtOpenXml.LanguageType, languageType); paraOrRun.Add(lta); } else { paraOrRun.Attribute(PtOpenXml.LanguageType).Value = languageType; } } }