Beispiel #1
0
 /// <summary>
 /// Initialize the ICU Data Dir. If necessary, adds the architecture-appropriate ICU DLL's to the PATH.
 /// </summary>
 public static void InitializeIcu()
 {
     // Set ICU_DATA environment variable
     if (string.IsNullOrEmpty(Environment.GetEnvironmentVariable("ICU_DATA")))
     {
         // We read the registry value and set an environment variable ICU_DATA here so that
         // FwKernelInterfaces.dll is independent of WinForms.
         // ENHANCE: store data directory somewhere else other than registry (user.config
         // file?) and use that.
         string icuDirValueName = string.Format("Icu{0}DataDir",
                                                CustomIcu.Version);
         using (var userKey = RegistryHelper.CompanyKey)
             using (var machineKey = RegistryHelper.CompanyKeyLocalMachine)
             {
                 string dir = null;
                 if (userKey != null && userKey.GetValue(icuDirValueName) != null)
                 {
                     dir = userKey.GetValue(icuDirValueName, dir) as string;
                 }
                 else if (machineKey != null && machineKey.GetValue(icuDirValueName) != null)
                 {
                     dir = machineKey.GetValue(icuDirValueName, dir) as string;
                 }
                 if (!string.IsNullOrEmpty(dir))
                 {
                     Environment.SetEnvironmentVariable("ICU_DATA", dir);
                 }
             }
     }
     // ICU_DATA should point to the directory that contains nfc_fw.nrm and nfkc_fw.nrm
     // (i.e. icudt54l).
     CustomIcu.InitIcuDataDir();
 }
Beispiel #2
0
        public bool UpdateWordform(IWfiWordform wordform, ParserPriority priority)
        {
            CheckDisposed();

            int       wordformHash = 0;
            ITsString form         = null;
            int       hvo          = 0;

            using (new WorkerThreadReadHelper(m_cache.ServiceLocator.GetInstance <IWorkerThreadReadHandler>()))
            {
                if (wordform.IsValidObject)
                {
                    wordformHash = wordform.Checksum;
                    form         = wordform.Form.VernacularDefaultWritingSystem;
                }
            }
            // 'form' will now be null, if it could not find the wordform for whatever reason.
            // uiCRCWordform will also now be 0, if 'form' is null.
            if (form == null || string.IsNullOrEmpty(form.Text))
            {
                return(false);
            }

            CheckNeedsUpdate();
            ParseResult result = m_parser.ParseWord(
                CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
                .Normalize(form.Text.Replace(' ', '.')));

            if (wordformHash == result.GetHashCode())
            {
                return(false);
            }

            return(m_parseFiler.ProcessParse(wordform, priority, result));
        }
Beispiel #3
0
        public static void Main(string[] args)
        {
            // The only purpose of this TestHelper app is to output the ICU version
            // so that we can run unit tests that test loading of our custom ICU
            // or fallback to default ICU
            var baseDir = args?.Length > 0 ? args[0] : CodeDir;

            SetIcuDataDirectory(baseDir, "IcuData");
            CustomIcu.InitIcuDataDir();
            Console.WriteLine(Wrapper.IcuVersion);
            Console.WriteLine(Character.GetCharType('\xF171'));
            Console.WriteLine(CustomIcu.HaveCustomIcuLibrary);
            Wrapper.Cleanup();
        }
Beispiel #4
0
        /// <summary>
        /// Try parsing a wordform, optionally getting a trace of the parse
        /// </summary>
        /// <param name="sForm">the word form to parse</param>
        /// <param name="fDoTrace">whether or not to trace the parse</param>
        /// <param name="sSelectTraceMorphs">list of msa hvos to limit trace to </param>
        public void TryAWord(string sForm, bool fDoTrace, int[] sSelectTraceMorphs)
        {
            CheckDisposed();

            if (sForm == null)
            {
                throw new ArgumentNullException("sForm", "TryAWord cannot trace a Null string.");
            }
            if (sForm == String.Empty)
            {
                throw new ArgumentException("Can't try a word with no content.", "sForm");
            }

            CheckNeedsUpdate();
            using (var task = new TaskReport(string.Format(ParserCoreStrings.ksTraceWordformX, sForm), m_taskUpdateHandler))
            {
                string normForm = CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD).Normalize(sForm);
                task.Details = fDoTrace ? m_parser.TraceWordXml(normForm, sSelectTraceMorphs) : m_parser.ParseWordXml(normForm);
            }
        }
Beispiel #5
0
        public void InstallPUACharacters()
        {
            // Use ICU to check out existing/nonexisting character properties.
            VerifyNonexistentChars();
            Assert.IsTrue(CustomIcu.IsCustomUse("E000"));
            Assert.IsTrue(CustomIcu.IsCustomUse("E001"));
            Assert.IsFalse(CustomIcu.IsCustomUse(kChar3S));
            Assert.IsFalse(CustomIcu.IsCustomUse("DDDDD"));
            Assert.IsTrue(CustomIcu.IsPrivateUse("E000"));
            Assert.IsTrue(CustomIcu.IsPrivateUse("E001"));
            Assert.IsFalse(CustomIcu.IsPrivateUse(kChar3S));
            Assert.IsFalse(CustomIcu.IsPrivateUse("DDDDD"));
            Assert.IsTrue(CustomIcu.IsValidCodepoint("E000"));
            Assert.IsTrue(CustomIcu.IsValidCodepoint("E001"));
            Assert.IsTrue(CustomIcu.IsValidCodepoint(kChar3S));
            Assert.IsTrue(CustomIcu.IsValidCodepoint("DDDDD"));

            // Create our own CustomChars.xml file with test data in it, and install it.
            CreateAndInstallOurCustomChars(m_sCustomCharsFile);

            // Use ICU to check out the newly installed character properties.
            VerifyNewlyCreatedChars();
        }
Beispiel #6
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Normalizes the strings read from the file into D (compatible decomposed).
 /// </summary>
 /// ------------------------------------------------------------------------------------
 private void NormalizeFileData()
 {
     // The following list of control characters should never appear in plain Unicode
     // data.
     char[] controlChars =
     {
         '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x0E', '\x0F',
         '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19',
         '\x1A', '\x1B', '\x7F'
     };
     for (int i = 0; i < m_fileData.Length; i++)
     {
         if (m_fileData[i].Length > 0)
         {
             if (m_fileData[i].IndexOfAny(controlChars) >= 0)
             {
                 throw new Exception(FWCoreDlgsErrors.ksInvalidControlCharacterFound);
             }
             m_fileData[i] = CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
                             .Normalize(m_fileData[i]);
         }
     }
 }
Beispiel #7
0
        private static void VerifyNewlyCreatedChars()
        {
            FwUtils.InitializeIcu();

            // The commented out methods below use u_getIntPropertyValue(), which doesn't
            // work reliably with the limited number of data files that we modify.
            //Assert.IsTrue(Icu.Character.IsAlphabetic(kChar1));	// now true
            //Assert.IsTrue(Icu.Character.IsAlphabetic(kChar2));	// now true
            //Assert.IsFalse(Icu.Character.IsAlphabetic(kChar3));
            //Assert.IsFalse(Icu.Character.IsAlphabetic(kChar4));
            Assert.IsFalse(Icu.Character.IsControl(kChar1));
            Assert.IsFalse(Icu.Character.IsControl(kChar2));
            Assert.IsFalse(Icu.Character.IsControl(kChar3));
            Assert.IsFalse(Icu.Character.IsControl(kChar4));
            //Assert.IsFalse(Icu.Character.IsDiacritic(kChar1));
            //Assert.IsFalse(Icu.Character.IsDiacritic(kChar2));
            //Assert.IsFalse(Icu.Character.IsDiacritic(kChar3));
            //Assert.IsFalse(Icu.Character.IsDiacritic(kChar4));
            //Assert.IsFalse(Icu.Character.IsIdeographic(kChar1));
            //Assert.IsFalse(Icu.Character.IsIdeographic(kChar2));
            //Assert.IsFalse(Icu.Character.IsIdeographic(kChar3));
            //Assert.IsFalse(Icu.Character.IsIdeographic(kChar4));
            //Assert.IsFalse(Icu.Character.IsNumeric(kChar1));
            //Assert.IsFalse(Icu.Character.IsNumeric(kChar2));
            //Assert.IsFalse(Icu.Character.IsNumeric(kChar3));
            //Assert.IsTrue(Icu.Character.IsNumeric(kChar4));		// now true
            Assert.IsFalse(Icu.Character.IsPunct(kChar1));
            Assert.IsFalse(Icu.Character.IsPunct(kChar2));
            Assert.IsTrue(Icu.Character.IsPunct(kChar3));                               // now true
            Assert.IsFalse(Icu.Character.IsPunct(kChar4));
            Assert.IsFalse(Icu.Character.IsSpace(kChar1));
            Assert.IsFalse(Icu.Character.IsSpace(kChar2));
            Assert.IsFalse(Icu.Character.IsSpace(kChar3));
            Assert.IsFalse(Icu.Character.IsSpace(kChar4));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar1));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar2));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar3));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar4));

            var cat = Icu.Character.GetCharType(kChar1);

            Assert.AreEqual(Icu.Character.UCharCategory.LOWERCASE_LETTER, cat);
            cat = Icu.Character.GetCharType(kChar2);
            Assert.AreEqual(Icu.Character.UCharCategory.UPPERCASE_LETTER, cat);
            cat = Icu.Character.GetCharType(kChar3);
            Assert.AreEqual(Icu.Character.UCharCategory.OTHER_PUNCTUATION, cat);
            cat = Icu.Character.GetCharType(kChar4);
            Assert.AreEqual(Icu.Character.UCharCategory.DECIMAL_DIGIT_NUMBER, cat);
            var decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar1);

            Assert.AreEqual("[none]", decompositionType.Description);
            decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar2);
            Assert.AreEqual("[none]", decompositionType.Description);
            decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar3);
            Assert.AreEqual("[none]", decompositionType.Description);
            decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar4);
            Assert.AreEqual("[none]", decompositionType.Description);
            var numericType = CustomIcu.GetNumericTypeInfo(kChar1);

            Assert.AreEqual("[none]", numericType.Description);
            numericType = CustomIcu.GetNumericTypeInfo(kChar2);
            Assert.AreEqual("[none]", numericType.Description);
            numericType = CustomIcu.GetNumericTypeInfo(kChar3);
            Assert.AreEqual("[none]", numericType.Description);

            // Current implementation (as of ICU50) is not overriding numeric type since we don't use it anywhere.
            // Enhance silmods.c in icu patch if needed.
            //numericType = Icu.GetNumericType(kChar4);
            //Assert.AreEqual("Decimal Digit", numericType.Description);

            // Current implementation (as of ICU50) is not overriding character names since we don't use them anywhere.
            // Enhance silmods.c in icu patch if needed.
            //var prettyName = Icu.GetPrettyICUCharName("\xE000");
            //Assert.AreEqual("My Special Character", prettyName);
            //prettyName = Icu.GetPrettyICUCharName("\xE001");
            //Assert.AreEqual("My Uppercase Character", prettyName);
            //prettyName = Icu.GetPrettyICUCharName(kChar3S);
            //Assert.AreEqual("New Punctuation Mark", prettyName);
            //var rawName = Icu.GetCharName(kChar4);	// can't pass large character code as 16-bit char.
            //Assert.AreEqual("NEW DIGIT NINE", rawName);
        }
Beispiel #8
0
        private static void VerifyNonexistentChars()
        {
            FwUtils.InitializeIcu();

            Assert.IsFalse(Icu.Character.IsAlphabetic(kChar1));
            Assert.IsFalse(Icu.Character.IsAlphabetic(kChar2));
            Assert.IsFalse(Icu.Character.IsAlphabetic(kChar3));
            Assert.IsFalse(Icu.Character.IsAlphabetic(kChar4));
            Assert.IsFalse(Icu.Character.IsControl(kChar1));
            Assert.IsFalse(Icu.Character.IsControl(kChar2));
            Assert.IsFalse(Icu.Character.IsControl(kChar3));
            Assert.IsFalse(Icu.Character.IsControl(kChar4));
            Assert.IsFalse(Icu.Character.IsDiacritic(kChar1));
            Assert.IsFalse(Icu.Character.IsDiacritic(kChar2));
            Assert.IsFalse(Icu.Character.IsDiacritic(kChar3));
            Assert.IsFalse(Icu.Character.IsDiacritic(kChar4));
            Assert.IsFalse(Icu.Character.IsIdeographic(kChar1));
            Assert.IsFalse(Icu.Character.IsIdeographic(kChar2));
            Assert.IsFalse(Icu.Character.IsIdeographic(kChar3));
            Assert.IsFalse(Icu.Character.IsIdeographic(kChar4));
            Assert.IsFalse(Icu.Character.IsNumeric(kChar1));
            Assert.IsFalse(Icu.Character.IsNumeric(kChar2));
            Assert.IsFalse(Icu.Character.IsNumeric(kChar3));
            Assert.IsFalse(Icu.Character.IsNumeric(kChar4));
            Assert.IsFalse(Icu.Character.IsPunct(kChar1));
            Assert.IsFalse(Icu.Character.IsPunct(kChar2));
            Assert.IsFalse(Icu.Character.IsPunct(kChar3));
            Assert.IsFalse(Icu.Character.IsPunct(kChar4));
            Assert.IsFalse(Icu.Character.IsSpace(kChar1));
            Assert.IsFalse(Icu.Character.IsSpace(kChar2));
            Assert.IsFalse(Icu.Character.IsSpace(kChar3));
            Assert.IsFalse(Icu.Character.IsSpace(kChar4));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar1));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar2));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar3));
            Assert.IsFalse(Icu.Character.IsSymbol(kChar4));

            Assert.AreEqual(Icu.Character.UCharCategory.PRIVATE_USE_CHAR, Icu.Character.GetCharType(kChar1));
            Assert.AreEqual(Icu.Character.UCharCategory.PRIVATE_USE_CHAR, Icu.Character.GetCharType(kChar2));
            Assert.AreEqual(Icu.Character.UCharCategory.UNASSIGNED, Icu.Character.GetCharType(kChar3));
            Assert.AreEqual(Icu.Character.UCharCategory.UNASSIGNED, Icu.Character.GetCharType(kChar4));
            var decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar1);

            Assert.AreEqual("[none]", decompositionType.Description);
            decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar2);
            Assert.AreEqual("[none]", decompositionType.Description);
            decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar3);
            Assert.AreEqual("[none]", decompositionType.Description);
            decompositionType = CustomIcu.GetDecompositionTypeInfo(kChar4);
            Assert.AreEqual("[none]", decompositionType.Description);
            var numericType = CustomIcu.GetNumericTypeInfo(kChar1);

            Assert.AreEqual("[none]", numericType.Description);
            numericType = CustomIcu.GetNumericTypeInfo(kChar2);
            Assert.AreEqual("[none]", numericType.Description);
            numericType = CustomIcu.GetNumericTypeInfo(kChar3);
            Assert.AreEqual("[none]", numericType.Description);
            numericType = CustomIcu.GetNumericTypeInfo(kChar4);
            Assert.AreEqual("[none]", numericType.Description);
            var prettyName = Icu.Character.GetPrettyICUCharName("\xE000");

            Assert.IsNull(prettyName);
            prettyName = Icu.Character.GetPrettyICUCharName("\xE001");
            Assert.IsNull(prettyName);
            prettyName = Icu.Character.GetPrettyICUCharName(kChar3S);
            Assert.IsNull(prettyName);
            prettyName = Icu.Character.GetPrettyICUCharName("\xDDDDD");
            Assert.IsNull(prettyName);
        }