/// <summary> /// Constructs a transliterator. /// </summary> public UppercaseTransliterator(ULocale loc) : base(_ID, null) { locale = loc; csp = UCaseProperties.Instance; iter = new ReplaceableContextEnumerator(); result = new StringBuilder(); caseLocale = UCaseProperties.GetCaseLocale(locale); }
/// <summary> /// Constructs a transliterator. /// </summary> public TitlecaseTransliterator(ULocale loc) : base(_ID, null) { locale = loc; // Need to look back 2 characters in the case of "can't" MaximumContextLength = 2; csp = UCaseProperties.Instance; iter = new ReplaceableContextEnumerator(); result = new StringBuilder(); caseLocale = UCaseProperties.GetCaseLocale(locale); }
public int ToFullTitle(int c, ICasePropertiesContextEnumerator iter, IAppendable output, CaseLocale caseLocale) { return(ToUpperOrTitle(c, iter, output, caseLocale, false)); }
public int ToFullUpper(int c, ICasePropertiesContextEnumerator iter, StringBuilder output, CaseLocale caseLocale) { return(ToUpperOrTitle(c, iter, output, caseLocale, true)); }
/* internal */ private int ToUpperOrTitle(int c, ICasePropertiesContextEnumerator iter, IAppendable output, CaseLocale caseLocale, bool upperNotTitle) { int result; int props; result = c; props = trie.Get(c); if (!PropsHasException(props)) { if (GetCaseTypeFromProps(props) == CaseType.Lower) { result = c + GetDelta(props); } } else { int excOffset = GetExceptionsOffset(props), excOffset2; int excWord = exceptions[excOffset++]; int full, index; excOffset2 = excOffset; if ((excWord & EXC_CONDITIONAL_SPECIAL) != 0) { /* use hardcoded conditions and mappings */ if (caseLocale == CaseLocale.Turkish && c == 0x69) { /* # Turkish and Azeri # # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri # The following rules handle those cases. # # When uppercasing, i turns into a dotted capital I # # 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I # 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I */ return(0x130); } else if (caseLocale == CaseLocale.Lithuanian && c == 0x307 && IsPrecededBySoftDotted(iter)) { /* # Lithuanian # # Lithuanian retains the dot in a lowercase i when followed by accents. # # Remove DOT ABOVE after "i" with upper or titlecase # # 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE */ return(0); /* remove the dot (continue without output) */ } else { /* no known conditional special case mapping, use a normal mapping */ } } else if (HasSlot(excWord, EXC_FULL_MAPPINGS)) { long value = GetSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); full = (int)value & 0xffff; /* start of full case mapping strings */ excOffset = (int)(value >> 32) + 1; /* skip the lowercase and case-folding result strings */ excOffset += full & FULL_LOWER; full >>= 4; excOffset += full & 0xf; full >>= 4; if (upperNotTitle) { full &= 0xf; } else { /* skip the uppercase result string */ excOffset += full & 0xf; full = (full >> 4) & 0xf; } if (full != 0) { try { // append the result string output.Append(exceptions, excOffset, full); // ICU4N: (excOffset + full) - excOffset == full /* return the string length */ return(full); } catch (IOException e) { throw new ICUUncheckedIOException(e); } } } if (!upperNotTitle && HasSlot(excWord, EXC_TITLE)) { index = EXC_TITLE; } else if (HasSlot(excWord, EXC_UPPER)) { /* here, titlecase is same as uppercase */ index = EXC_UPPER; } else { return(~c); } result = GetSlotValue(excWord, index, excOffset2); } return((result == c) ? ~result : result); }
/// <summary> /// Get the full lowercase mapping for <paramref name="c"/>. /// </summary> /// <param name="c">Character to be mapped.</param> /// <param name="iter"> /// Character iterator, used for context-sensitive mappings. /// See <see cref="ICasePropertiesContextEnumerator"/> for details. /// If iter==null then a context-independent result is returned. /// </param> /// <param name="output">If the mapping result is a string, then it is appended to <paramref name="output"/>.</param> /// <param name="caseLocale">Case locale value from <see cref="GetCaseLocale(System.Globalization.CultureInfo)"/>.</param> /// <returns>Output code point or string length, see <see cref="MaxStringLength"/>.</returns> /// <seealso cref="ICasePropertiesContextEnumerator"/> /// <seealso cref="MaxStringLength"/> /// <internal/> public int ToFullLower(int c, ICasePropertiesContextEnumerator iter, StringBuilder output, CaseLocale caseLocale) { int result, props; result = c; props = trie.Get(c); if (!PropsHasException(props)) { if (GetCaseTypeFromProps(props) >= CaseType.Upper) { result = c + GetDelta(props); } } else { int excOffset = GetExceptionsOffset(props), excOffset2; int excWord = exceptions[excOffset++]; int full; excOffset2 = excOffset; if ((excWord & EXC_CONDITIONAL_SPECIAL) != 0) { /* use hardcoded conditions and mappings */ /* * Test for conditional mappings first * (otherwise the unconditional default mappings are always taken), * then test for characters that have unconditional mappings in SpecialCasing.txt, * then get the UnicodeData.txt mappings. */ if (caseLocale == CaseLocale.Lithuanian && /* base characters, find accents above */ (((c == 0x49 || c == 0x4a || c == 0x12e) && IsFollowedByMoreAbove(iter)) || /* precomposed with accent above, no need to find one */ (c == 0xcc || c == 0xcd || c == 0x128)) ) { /* # Lithuanian # # Lithuanian retains the dot in a lowercase i when followed by accents. # # Introduce an explicit dot above when lowercasing capital I's and J's # whenever there are more accents above. # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) # # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE */ try { switch (c) { case 0x49: /* LATIN CAPITAL LETTER I */ output.Append(iDot); return(2); case 0x4a: /* LATIN CAPITAL LETTER J */ output.Append(jDot); return(2); case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ output.Append(iOgonekDot); return(2); case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ output.Append(iDotGrave); return(3); case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ output.Append(iDotAcute); return(3); case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ output.Append(iDotTilde); return(3); default: return(0); /* will not occur */ } } catch (IOException e) { throw new ICUUncheckedIOException(e); } /* # Turkish and Azeri */ } else if (caseLocale == CaseLocale.Turkish && c == 0x130) { /* # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri # The following rules handle those cases. # # 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE # 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE */ return(0x69); } else if (caseLocale == CaseLocale.Turkish && c == 0x307 && IsPrecededBy_I(iter)) { /* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. # This matches the behavior of the canonically equivalent I-dot_above # # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE # 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE */ return(0); /* remove the dot (continue without output) */ } else if (caseLocale == CaseLocale.Turkish && c == 0x49 && !IsFollowedByDotAbove(iter)) { /* # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. # # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I # 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I */ return(0x131); } else if (c == 0x130) { /* # Preserve canonical equivalence for I with dot. Turkic is handled below. # # 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ try { output.Append(iDot); return(2); } catch (IOException e) { throw new ICUUncheckedIOException(e); } } else if (c == 0x3a3 && !IsFollowedByCasedLetter(iter, forward: true) && IsFollowedByCasedLetter(iter, forward: false) /* -1=preceded */ ) { /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ /* # Special case for final form of sigma # # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA */ return(0x3c2); /* greek small final sigma */ } else { /* no known conditional special case mapping, use a normal mapping */ } } else if (HasSlot(excWord, EXC_FULL_MAPPINGS)) { long value = GetSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); full = (int)value & FULL_LOWER; if (full != 0) { /* start of full case mapping strings */ excOffset = (int)(value >> 32) + 1; try { // append the lowercase mapping output.Append(exceptions, excOffset, full); // ICU4N: (excOffset + full) - excOffset == full /* return the string length */ return(full); } catch (IOException e) { throw new ICUUncheckedIOException(e); } } } if (HasSlot(excWord, EXC_LOWER)) { result = GetSlotValue(excWord, EXC_LOWER, excOffset2); } } return((result == c) ? ~result : result); }