/// ------------------------------------------------------------------------------------ /// <summary> /// Get the cached character associated with the given codepoint /// </summary> /// <param name="codepoint">The integer codepoint of the cached character.</param> /// <returns>The character as a PUACharacter</returns> /// ------------------------------------------------------------------------------------ public PUACharacter FindCachedIcuEntry(int codepoint) { CheckDisposed(); //? Need to have at least 4 digits in the string. //? string codepointAsString = string.Format("{0:x4}", codepoint).ToUpper(); string codepointAsString = codepoint.ToString("x").ToUpper(); foreach (int[] range in m_cachedIcuRanges) { if (IsInRange(codepoint, range)) { // If the range is not loaded, load it if (range[2]++ == 0) FillRange(range); return (PUACharacter)m_cachedIcu.Find(codepointAsString, new UCDComparer()); } } // If we get here, we didn't find the codepoint PUACharacter newIcuChar = new PUACharacter(codepointAsString); // Load the character if it exists and has a name if (newIcuChar.RefreshFromIcu(false)) { // This can be called from PuaCharacterDlg where m_cachedIcu has not been initialized. InitializeIcuCache(); m_cachedIcu.Insert(newIcuChar); return newIcuChar; } return null; }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// ------------------------------------------------------------------------------------ private void FillRange(int[] range) { // Makes a new empty Icu Cache, if it doesn't already exist InitializeIcuCache(); PUACharacter newIcuChar; for (int codepoint = range[0]; codepoint <= range[1]; codepoint++) { // TODO: Make sure that leaving off leading zeros is okay. newIcuChar = new PUACharacter(codepoint.ToString("x").ToUpper()); // Fill in the character from the ICU database. newIcuChar.RefreshFromIcu(true); // Add the character to the cache m_cachedIcu.Insert(newIcuChar); // REVIEW (TimS/EberhardB): We don't think we should call GC.Collect(). // If there seems to be a need for it, then we probably need to call // Dispose() on one of the variables we want disposed. if (codepoint % 0x100 == 0) GC.Collect(); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Makes sure that if the specified codepoint is a custom PUA character, that that /// character is added to the language definition's PUA collection. /// </summary> /// ------------------------------------------------------------------------------------ private static void UpdateLangDefPUACollection(int codepoint, LanguageDefinition langDef, List<PUACharacter> customPuaCharacters) { // Go through all the custom defined PUA characters in ICU and make sure that if // our codepoint is one of them, the language definition contains our codepoint // in its collection of PUA characters. foreach (PUACharacter customChar in customPuaCharacters) { string sCodePoint = codepoint.ToString("x4").ToUpperInvariant(); // Is our codepoint one of the custom PUA characters? If so, // add it to the language definition's PUA collection. if (sCodePoint == customChar.CodePoint) { PUACharacter puaChar = new PUACharacter(sCodePoint); puaChar.RefreshFromIcu(false); langDef.AddPuaDefinition(codepoint, puaChar.ToString()); } } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Returns a list of the user defined custom characters in ICU by scanning the Unicode data file. /// </summary> /// ------------------------------------------------------------------------------------ public static List<PUACharacter> GetDefinedCustomPUACharsFromICU() { List<PUACharacter> definedChars = new List<PUACharacter>(); string uniData = DirectoryFinder.GetIcuDirectory; // "c:\\work\\fw60\\distfiles\\Icu40\\icudt40l\\" uniData = uniData.Substring(0, uniData.LastIndexOf(@"\icudt")); uniData = Path.Combine(uniData, @"data\unidata\UnicodeData.txt"); StreamReader reader = new StreamReader(uniData, System.Text.Encoding.ASCII); int chT = reader.Peek(); // force autodetection of encoding. try { string line; while((line = reader.ReadLine()) != null) { // skip entirely blank lines if(line.Length <= 0) continue; if (line.IndexOf("User Added") != -1) { PUACharacter puaChar = new PUACharacter("0000"); string codepoint = line.Substring(0, line.IndexOf(';')).Trim(); puaChar.CodePoint = codepoint; puaChar.RefreshFromIcu(false); definedChars.Add(puaChar); } } } finally { reader.Close(); } // REVIEW (TimS/EberhardB): We don't think we should call GC.Collect(). // If there seems to be a need for it, then we probably need to call // Dispose() on one of the variables we want disposed. GC.Collect(); return definedChars; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether or not the specified codepoint is defined in ICU. If it is and /// its a custom PUA character, then make sure the language definition's PUA character /// collection is updated. Return false if the codepoint is not defined in ICU. /// </summary> /// ------------------------------------------------------------------------------------ private static bool IsCodePointDefined(int codepoint) { PUACharacter puaChar = new PUACharacter(codepoint); try { // If the definition doesn't exist in ICU or the character has no name then // it's undefined and shouldn't be allowed in the valid characters list. if (!puaChar.RefreshFromIcu(false) || puaChar.Name.Length == 0) return false; // If the ICU category is one of the "Other" categories, then it's // undefined and shouldn't be allowed in the valid characters list. string ucdrep = puaChar.GeneralCategory.UcdRepresentation; ucdrep = ucdrep.ToUpperInvariant(); // TODO: When tabs are supported, then allow them (they are in category Cc). // See TE-3004. if (ucdrep[0] == 'C' && ucdrep[1] != 'F') return false; } catch { return false; } return true; }