/// <summary> /// Initialize the ICU Data Dir. If necessary, adds the architecture-appropriate ICU DLL's to the PATH. /// </summary> public static void InitializeIcu() { // Set ICU_DATA environment variable if (string.IsNullOrEmpty(Environment.GetEnvironmentVariable("ICU_DATA"))) { // We read the registry value and set an environment variable ICU_DATA here so that // FwKernelInterfaces.dll is independent of WinForms. // ENHANCE: store data directory somewhere else other than registry (user.config // file?) and use that. string icuDirValueName = string.Format("Icu{0}DataDir", CustomIcu.Version); using (var userKey = RegistryHelper.CompanyKey) using (var machineKey = RegistryHelper.CompanyKeyLocalMachine) { string dir = null; if (userKey != null && userKey.GetValue(icuDirValueName) != null) { dir = userKey.GetValue(icuDirValueName, dir) as string; } else if (machineKey != null && machineKey.GetValue(icuDirValueName) != null) { dir = machineKey.GetValue(icuDirValueName, dir) as string; } if (!string.IsNullOrEmpty(dir)) { Environment.SetEnvironmentVariable("ICU_DATA", dir); } } } // ICU_DATA should point to the directory that contains nfc_fw.nrm and nfkc_fw.nrm // (i.e. icudt54l). CustomIcu.InitIcuDataDir(); }
public bool UpdateWordform(IWfiWordform wordform, ParserPriority priority) { CheckDisposed(); int wordformHash = 0; ITsString form = null; int hvo = 0; using (new WorkerThreadReadHelper(m_cache.ServiceLocator.GetInstance <IWorkerThreadReadHandler>())) { if (wordform.IsValidObject) { wordformHash = wordform.Checksum; form = wordform.Form.VernacularDefaultWritingSystem; } } // 'form' will now be null, if it could not find the wordform for whatever reason. // uiCRCWordform will also now be 0, if 'form' is null. if (form == null || string.IsNullOrEmpty(form.Text)) { return(false); } CheckNeedsUpdate(); ParseResult result = m_parser.ParseWord( CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD) .Normalize(form.Text.Replace(' ', '.'))); if (wordformHash == result.GetHashCode()) { return(false); } return(m_parseFiler.ProcessParse(wordform, priority, result)); }
public static void Main(string[] args) { // The only purpose of this TestHelper app is to output the ICU version // so that we can run unit tests that test loading of our custom ICU // or fallback to default ICU var baseDir = args?.Length > 0 ? args[0] : CodeDir; SetIcuDataDirectory(baseDir, "IcuData"); CustomIcu.InitIcuDataDir(); Console.WriteLine(Wrapper.IcuVersion); Console.WriteLine(Character.GetCharType('\xF171')); Console.WriteLine(CustomIcu.HaveCustomIcuLibrary); Wrapper.Cleanup(); }
/// <summary> /// Try parsing a wordform, optionally getting a trace of the parse /// </summary> /// <param name="sForm">the word form to parse</param> /// <param name="fDoTrace">whether or not to trace the parse</param> /// <param name="sSelectTraceMorphs">list of msa hvos to limit trace to </param> public void TryAWord(string sForm, bool fDoTrace, int[] sSelectTraceMorphs) { CheckDisposed(); if (sForm == null) { throw new ArgumentNullException("sForm", "TryAWord cannot trace a Null string."); } if (sForm == String.Empty) { throw new ArgumentException("Can't try a word with no content.", "sForm"); } CheckNeedsUpdate(); using (var task = new TaskReport(string.Format(ParserCoreStrings.ksTraceWordformX, sForm), m_taskUpdateHandler)) { string normForm = CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD).Normalize(sForm); task.Details = fDoTrace ? m_parser.TraceWordXml(normForm, sSelectTraceMorphs) : m_parser.ParseWordXml(normForm); } }
public void InstallPUACharacters() { // Use ICU to check out existing/nonexisting character properties. VerifyNonexistentChars(); Assert.IsTrue(CustomIcu.IsCustomUse("E000")); Assert.IsTrue(CustomIcu.IsCustomUse("E001")); Assert.IsFalse(CustomIcu.IsCustomUse(kChar3S)); Assert.IsFalse(CustomIcu.IsCustomUse("DDDDD")); Assert.IsTrue(CustomIcu.IsPrivateUse("E000")); Assert.IsTrue(CustomIcu.IsPrivateUse("E001")); Assert.IsFalse(CustomIcu.IsPrivateUse(kChar3S)); Assert.IsFalse(CustomIcu.IsPrivateUse("DDDDD")); Assert.IsTrue(CustomIcu.IsValidCodepoint("E000")); Assert.IsTrue(CustomIcu.IsValidCodepoint("E001")); Assert.IsTrue(CustomIcu.IsValidCodepoint(kChar3S)); Assert.IsTrue(CustomIcu.IsValidCodepoint("DDDDD")); // Create our own CustomChars.xml file with test data in it, and install it. CreateAndInstallOurCustomChars(m_sCustomCharsFile); // Use ICU to check out the newly installed character properties. VerifyNewlyCreatedChars(); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Normalizes the strings read from the file into D (compatible decomposed). /// </summary> /// ------------------------------------------------------------------------------------ private void NormalizeFileData() { // The following list of control characters should never appear in plain Unicode // data. char[] controlChars = { '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x0E', '\x0F', '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1A', '\x1B', '\x7F' }; for (int i = 0; i < m_fileData.Length; i++) { if (m_fileData[i].Length > 0) { if (m_fileData[i].IndexOfAny(controlChars) >= 0) { throw new Exception(FWCoreDlgsErrors.ksInvalidControlCharacterFound); } m_fileData[i] = CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD) .Normalize(m_fileData[i]); } } }
private static void VerifyNewlyCreatedChars() { FwUtils.InitializeIcu(); // The commented out methods below use u_getIntPropertyValue(), which doesn't // work reliably with the limited number of data files that we modify. //Assert.IsTrue(Icu.Character.IsAlphabetic(kChar1)); // now true //Assert.IsTrue(Icu.Character.IsAlphabetic(kChar2)); // now true //Assert.IsFalse(Icu.Character.IsAlphabetic(kChar3)); //Assert.IsFalse(Icu.Character.IsAlphabetic(kChar4)); Assert.IsFalse(Icu.Character.IsControl(kChar1)); Assert.IsFalse(Icu.Character.IsControl(kChar2)); Assert.IsFalse(Icu.Character.IsControl(kChar3)); Assert.IsFalse(Icu.Character.IsControl(kChar4)); //Assert.IsFalse(Icu.Character.IsDiacritic(kChar1)); //Assert.IsFalse(Icu.Character.IsDiacritic(kChar2)); //Assert.IsFalse(Icu.Character.IsDiacritic(kChar3)); //Assert.IsFalse(Icu.Character.IsDiacritic(kChar4)); //Assert.IsFalse(Icu.Character.IsIdeographic(kChar1)); //Assert.IsFalse(Icu.Character.IsIdeographic(kChar2)); //Assert.IsFalse(Icu.Character.IsIdeographic(kChar3)); //Assert.IsFalse(Icu.Character.IsIdeographic(kChar4)); //Assert.IsFalse(Icu.Character.IsNumeric(kChar1)); //Assert.IsFalse(Icu.Character.IsNumeric(kChar2)); //Assert.IsFalse(Icu.Character.IsNumeric(kChar3)); //Assert.IsTrue(Icu.Character.IsNumeric(kChar4)); // now true Assert.IsFalse(Icu.Character.IsPunct(kChar1)); Assert.IsFalse(Icu.Character.IsPunct(kChar2)); Assert.IsTrue(Icu.Character.IsPunct(kChar3)); // now true Assert.IsFalse(Icu.Character.IsPunct(kChar4)); Assert.IsFalse(Icu.Character.IsSpace(kChar1)); Assert.IsFalse(Icu.Character.IsSpace(kChar2)); Assert.IsFalse(Icu.Character.IsSpace(kChar3)); Assert.IsFalse(Icu.Character.IsSpace(kChar4)); Assert.IsFalse(Icu.Character.IsSymbol(kChar1)); Assert.IsFalse(Icu.Character.IsSymbol(kChar2)); Assert.IsFalse(Icu.Character.IsSymbol(kChar3)); Assert.IsFalse(Icu.Character.IsSymbol(kChar4)); var cat = Icu.Character.GetCharType(kChar1); Assert.AreEqual(Icu.Character.UCharCategory.LOWERCASE_LETTER, cat); cat = Icu.Character.GetCharType(kChar2); Assert.AreEqual(Icu.Character.UCharCategory.UPPERCASE_LETTER, cat); cat = Icu.Character.GetCharType(kChar3); Assert.AreEqual(Icu.Character.UCharCategory.OTHER_PUNCTUATION, cat); cat = Icu.Character.GetCharType(kChar4); Assert.AreEqual(Icu.Character.UCharCategory.DECIMAL_DIGIT_NUMBER, cat); var decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar1); Assert.AreEqual("[none]", decompositionType.Description); decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar2); Assert.AreEqual("[none]", decompositionType.Description); decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar3); Assert.AreEqual("[none]", decompositionType.Description); decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar4); Assert.AreEqual("[none]", decompositionType.Description); var numericType = CustomIcu.GetNumericTypeInfo(kChar1); Assert.AreEqual("[none]", numericType.Description); numericType = CustomIcu.GetNumericTypeInfo(kChar2); Assert.AreEqual("[none]", numericType.Description); numericType = CustomIcu.GetNumericTypeInfo(kChar3); Assert.AreEqual("[none]", numericType.Description); // Current implementation (as of ICU50) is not overriding numeric type since we don't use it anywhere. // Enhance silmods.c in icu patch if needed. //numericType = Icu.GetNumericType(kChar4); //Assert.AreEqual("Decimal Digit", numericType.Description); // Current implementation (as of ICU50) is not overriding character names since we don't use them anywhere. // Enhance silmods.c in icu patch if needed. //var prettyName = Icu.GetPrettyICUCharName("\xE000"); //Assert.AreEqual("My Special Character", prettyName); //prettyName = Icu.GetPrettyICUCharName("\xE001"); //Assert.AreEqual("My Uppercase Character", prettyName); //prettyName = Icu.GetPrettyICUCharName(kChar3S); //Assert.AreEqual("New Punctuation Mark", prettyName); //var rawName = Icu.GetCharName(kChar4); // can't pass large character code as 16-bit char. //Assert.AreEqual("NEW DIGIT NINE", rawName); }
private static void VerifyNonexistentChars() { FwUtils.InitializeIcu(); Assert.IsFalse(Icu.Character.IsAlphabetic(kChar1)); Assert.IsFalse(Icu.Character.IsAlphabetic(kChar2)); Assert.IsFalse(Icu.Character.IsAlphabetic(kChar3)); Assert.IsFalse(Icu.Character.IsAlphabetic(kChar4)); Assert.IsFalse(Icu.Character.IsControl(kChar1)); Assert.IsFalse(Icu.Character.IsControl(kChar2)); Assert.IsFalse(Icu.Character.IsControl(kChar3)); Assert.IsFalse(Icu.Character.IsControl(kChar4)); Assert.IsFalse(Icu.Character.IsDiacritic(kChar1)); Assert.IsFalse(Icu.Character.IsDiacritic(kChar2)); Assert.IsFalse(Icu.Character.IsDiacritic(kChar3)); Assert.IsFalse(Icu.Character.IsDiacritic(kChar4)); Assert.IsFalse(Icu.Character.IsIdeographic(kChar1)); Assert.IsFalse(Icu.Character.IsIdeographic(kChar2)); Assert.IsFalse(Icu.Character.IsIdeographic(kChar3)); Assert.IsFalse(Icu.Character.IsIdeographic(kChar4)); Assert.IsFalse(Icu.Character.IsNumeric(kChar1)); Assert.IsFalse(Icu.Character.IsNumeric(kChar2)); Assert.IsFalse(Icu.Character.IsNumeric(kChar3)); Assert.IsFalse(Icu.Character.IsNumeric(kChar4)); Assert.IsFalse(Icu.Character.IsPunct(kChar1)); Assert.IsFalse(Icu.Character.IsPunct(kChar2)); Assert.IsFalse(Icu.Character.IsPunct(kChar3)); Assert.IsFalse(Icu.Character.IsPunct(kChar4)); Assert.IsFalse(Icu.Character.IsSpace(kChar1)); Assert.IsFalse(Icu.Character.IsSpace(kChar2)); Assert.IsFalse(Icu.Character.IsSpace(kChar3)); Assert.IsFalse(Icu.Character.IsSpace(kChar4)); Assert.IsFalse(Icu.Character.IsSymbol(kChar1)); Assert.IsFalse(Icu.Character.IsSymbol(kChar2)); Assert.IsFalse(Icu.Character.IsSymbol(kChar3)); Assert.IsFalse(Icu.Character.IsSymbol(kChar4)); Assert.AreEqual(Icu.Character.UCharCategory.PRIVATE_USE_CHAR, Icu.Character.GetCharType(kChar1)); Assert.AreEqual(Icu.Character.UCharCategory.PRIVATE_USE_CHAR, Icu.Character.GetCharType(kChar2)); Assert.AreEqual(Icu.Character.UCharCategory.UNASSIGNED, Icu.Character.GetCharType(kChar3)); Assert.AreEqual(Icu.Character.UCharCategory.UNASSIGNED, Icu.Character.GetCharType(kChar4)); var decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar1); Assert.AreEqual("[none]", decompositionType.Description); decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar2); Assert.AreEqual("[none]", decompositionType.Description); decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar3); Assert.AreEqual("[none]", decompositionType.Description); decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar4); Assert.AreEqual("[none]", decompositionType.Description); var numericType = CustomIcu.GetNumericTypeInfo(kChar1); Assert.AreEqual("[none]", numericType.Description); numericType = CustomIcu.GetNumericTypeInfo(kChar2); Assert.AreEqual("[none]", numericType.Description); numericType = CustomIcu.GetNumericTypeInfo(kChar3); Assert.AreEqual("[none]", numericType.Description); numericType = CustomIcu.GetNumericTypeInfo(kChar4); Assert.AreEqual("[none]", numericType.Description); var prettyName = Icu.Character.GetPrettyICUCharName("\xE000"); Assert.IsNull(prettyName); prettyName = Icu.Character.GetPrettyICUCharName("\xE001"); Assert.IsNull(prettyName); prettyName = Icu.Character.GetPrettyICUCharName(kChar3S); Assert.IsNull(prettyName); prettyName = Icu.Character.GetPrettyICUCharName("\xDDDDD"); Assert.IsNull(prettyName); }