/// <summary> /// Test the character name at the memory address specified. /// Will assert an error if the PUA codepoint name is not correct. /// </summary> /// <param name="puaIndex">Unicode codepoint</param> /// <param name="puaName">Expected correct PUA codepoint name</param> /// <param name="puaGenCat">The expected PUA General Category</param> public static void Check_PUA(int puaIndex, string puaName, LgGeneralCharCategory puaGenCat) { string name = ""; LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn; //Getting the character name at the memory address specified ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create(); try { string icuDataDir = GetIcuDataDir(); Icu.SetDataDirectory(icuDataDir); Icu.UErrorCode error; Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME; int len = Icu.u_CharName(puaIndex, choice, out name, out error); genCategory = charPropEngine.get_GeneralCategory(puaIndex); } finally { // Must release pointer to free memory-mapping before we try to restore files. Marshal.ReleaseComObject(charPropEngine); charPropEngine = null; Icu.Cleanup(); // clean up the ICU files / data } //Check to make sure expected result is the same as actual result, if not, output error Assert.AreEqual(puaName, name, "PUA Character " + puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) + " is incorrect"); //Check to make sure expected result is the same as actual result, if not, output error Assert.AreEqual(puaGenCat, genCategory, "PUA Character " + puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) + " has an incorrect digit value"); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Test the character name at the memory address specified. /// Will assert an error if the PUA codepoint name is not correct. /// </summary> /// <param name="puaIndex">Unicode codepoint</param> /// <param name="puaName">Expected correct PUA codepoint name</param> /// <param name="puaGenCat">The expected PUA General Category</param> /// ------------------------------------------------------------------------------------ public static void Check_PUA(int puaIndex, string puaName, LgGeneralCharCategory puaGenCat) { string name = string.Empty; LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn; //Getting the character name at the memory address specified ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create(); try { Icu.UErrorCode error; Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME; Icu.u_CharName(puaIndex, choice, out name, out error); genCategory = charPropEngine.get_GeneralCategory(puaIndex); } finally { // Must release pointer to free memory-mapping before we try to restore files. Marshal.ReleaseComObject(charPropEngine); charPropEngine = null; Icu.Cleanup(); // clean up the ICU files / data } //Check to make sure expected result is the same as actual result, if not, output error Assert.AreEqual(puaName, name, "PUA Character " + puaIndex.ToString("x",new System.Globalization.NumberFormatInfo()) + " is incorrect"); //Check to make sure expected result is the same as actual result, if not, output error Assert.AreEqual(puaGenCat, genCategory, "PUA Character " + puaIndex.ToString("x",new System.Globalization.NumberFormatInfo()) + " has an incorrect digit value"); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets an enumeration associated with the given General Category /// </summary> /// <param name="generalCategory">The UCD general category /// see: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values</param> /// <returns> /// Returns the only instance that matches the requested value. /// Thus two calls will get the same instance. /// </returns> /// ------------------------------------------------------------------------------------ public static UcdProperty GetInstance(LgGeneralCharCategory generalCategory) { InitializeHashTables(); Dictionary <int, UcdProperty> propertyHash = s_ucdPropertyDict[UcdCategories.generalCategory]; return(propertyHash[(int)generalCategory]); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether the specified character is not a word-forming character /// according to ICU, but should be allowed to be a word-forming override. /// </summary> /// <param name="chr">The character to test</param> /// <returns><c>true</c> if the specified character is able to be overridden to be /// word-forming (i.e., is a punctuation or symbol character according to ICU or is one /// of the special exceptions); /// <c>false</c> otherwise</returns> /// ------------------------------------------------------------------------------------ public bool CanBeWordFormingOverride(string chr) { if (string.IsNullOrEmpty(chr) || chr.Length > 1) { return(false); } int code = chr[0]; if (code == 0x200C || code == 0x200D) { return(true); // Zero-width non-joiner or zero-width joiner } string category = (m_langDef == null) ? null : m_langDef.GetOverrideCharCategory(chr[0]); if (category != null) { return(category[0] == 'S' || category[0] == 'P'); } LgGeneralCharCategory chrCategory = m_cpe.get_GeneralCategory(code); if (chrCategory == LgGeneralCharCategory.kccSc || chrCategory == LgGeneralCharCategory.kccSk || chrCategory == LgGeneralCharCategory.kccSm || chrCategory == LgGeneralCharCategory.kccSo) { return(true); // symbol } if (chrCategory == LgGeneralCharCategory.kccPc || chrCategory == LgGeneralCharCategory.kccPd || chrCategory == LgGeneralCharCategory.kccPe || chrCategory == LgGeneralCharCategory.kccPf || chrCategory == LgGeneralCharCategory.kccPi || chrCategory == LgGeneralCharCategory.kccPo || chrCategory == LgGeneralCharCategory.kccPs) { return(true); // punctuation } return(false); }
private string ProcessParseException(Exception e) { var ise = e as InvalidShapeException; if (ise != null) { string phonemesFoundSoFar = ise.String.Substring(0, ise.Position); string rest = ise.String.Substring(ise.Position); LgGeneralCharCategory cc = m_cache.ServiceLocator.UnicodeCharProps.get_GeneralCategory(rest[0]); if (cc == LgGeneralCharCategory.kccMn) { // the first character is a diacritic, combining type of character // insert a space so it does not show on top of a single quote in the message string rest = " " + rest; } return(string.Format(ParserCoreStrings.ksHCInvalidWordform, ise.String, ise.Position + 1, rest, phonemesFoundSoFar)); } return(String.Format(ParserCoreStrings.ksHCDefaultErrorMsg, e.Message)); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether the given character is used to end a sentence. /// </summary> /// <param name="ch">The character.</param> /// <param name="cc">The general character category.</param> /// <returns> /// <c>true</c> if the character ends a sentence; otherwise, <c>false</c>. /// </returns> /// ------------------------------------------------------------------------------------ public static bool IsEndOfSentenceChar(int ch, LgGeneralCharCategory cc) { // The preliminary check of cc is just for efficiency. All these characters have this property. // EXCLAMATION MARK // FULL STOP // QUESTION MARK // ARMENIAN EXCLAMATION MARK // ARMENIAN QUESTION MARK // ARMENIAN FULL STOP // ARABIC QUESTION MARK // ARABIC FULL STOP // SYRIAC END OF PARAGRAPH // SYRIAC SUPRALINEAR FULL STOP // SYRIAC SUBLINEAR FULL STOP // DEVANAGARI DANDA // DEVANAGARI DOUBLE DANDA // MYANMAR SIGN LITTLE SECTION // MYANMAR SIGN SECTION // ETHIOPIC FULL STOP // ETHIOPIC QUESTION MARK // ETHIOPIC PARAGRAPH SEPARATOR // CANADIAN SYLLABICS FULL STOP // MONGOLIAN FULL STOP // MONGOLIAN MANCHU FULL STOP // LIMBU EXCLAMATION MARK // LIMBU QUESTION MARK // DOUBLE EXCLAMATION MARK // INTERROBANG // DOUBLE QUESTION MARK // QUESTION EXCLAMATION MARK // EXCLAMATION QUESTION MARK // IDEOGRAPHIC FULL STOP // SMALL FULL STOP // SMALL QUESTION MARK // SMALL EXCLAMATION MARK // FULLWIDTH EXCLAMATION MARK // FULLWIDTH FULL STOP // FULLWIDTH QUESTION MARK // HALFWIDTH IDEOGRAPHIC FULL STOP // Except this is not a normal punctuation character. return (cc == LgGeneralCharCategory.kccPo && (ch == 0x0021 || ch == 0x002E || ch == 0x003F || ch == 0x055C || ch == 0x055E || ch == 0x0589 || ch == 0x061F || ch == 0x06D4 || ch == 0x0700 || ch == 0x0701 || ch == 0x0702 || ch == 0x0964 || ch == 0x0965 || ch == 0x104A || ch == 0x104B || ch == 0x1362 || ch == 0x1367 || ch == 0x1368 || ch == 0x166E || ch == 0x1803 || ch == 0x1809 || ch == 0x1944 || ch == 0x1945 || ch == 0x203C || ch == 0x203D || ch == 0x2047 || ch == 0x2048 || ch == 0x2049 || ch == 0x3002 || ch == 0xFE52 || ch == 0xFE56 || ch == 0xFE57 || ch == 0xFF01 || ch == 0xFF0E || ch == 0xFF1F || ch == 0xFF61)) || ch == 0x00A7; // SECTION SIGN (used for forced segment breaks w/o punctuation) }
/// <summary> /// Constructor. /// </summary> /// <param name="generalCharCategory"></param> /// <param name="isLetter"></param> /// <param name="isWordforming"></param> /// <param name="isPunctuation"></param> public CharacterProperty(LgGeneralCharCategory generalCharCategory, bool isLetter, bool isWordforming, bool isPunctuation) { m_generalCharCategory = generalCharCategory; m_isLetter = isLetter; m_isWordforming = isWordforming; m_isPunctuation = isPunctuation; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether the specified cc is diacritic. /// </summary> /// <param name="cc">The cc.</param> /// <returns> /// <c>true</c> if the specified cc is diacritic; otherwise, <c>false</c>. /// </returns> /// ------------------------------------------------------------------------------------ public override bool IsDiacritic(char cc) { LgGeneralCharCategory cat = m_charPropEngine.get_GeneralCategory(cc); return(cat == LgGeneralCharCategory.kccMc || cat == LgGeneralCharCategory.kccMn); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets an enumeration associated with the given General Category /// </summary> /// <param name="generalCategory">The UCD general category /// see: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values</param> /// <returns> /// Returns the only instance that matches the requested value. /// Thus two calls will get the same instance. /// </returns> /// ------------------------------------------------------------------------------------ public static UcdProperty GetInstance(LgGeneralCharCategory generalCategory) { InitializeHashTables(); Dictionary<int, UcdProperty> propertyHash = s_ucdPropertyDict[UcdCategories.generalCategory]; return propertyHash[(int)generalCategory]; }
/// <summary> /// Checks all the values of a character in the UnicodeData.txt. /// Checks: fields 1-8,11-14 /// (Skips, 9 and 10, the "Bidi Mirrored" and "Unicode Version 1" /// </summary> /// <param name="puaIndex"></param><param name="puaName"></param> /// <param name="puaGenCat"></param><param name="puaCombiningClass"></param> /// <param name="puaBidiClass"></param><param name="puaDecomposition"></param> /// <param name="puaNumeric"></param><param name="puaNumericValue"></param> /// <param name="puaComment"></param><param name="puaToUpper"></param> /// <param name="puaToLower"></param><param name="puaToTitle"></param> public static void Check_PUA( int puaIndex, string puaName, LgGeneralCharCategory puaGenCat, int puaCombiningClass, LgBidiCategory puaBidiClass, string puaDecomposition, bool puaNumeric, int puaNumericValue, string puaComment, int puaToUpper, int puaToLower, int puaToTitle ) { string name = ""; LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn; int combiningClass = 0; string decomposition = "None"; LgBidiCategory bidiCategory = LgBidiCategory.kbicL; string fullDecomp = "I have no clue"; bool isNumber = false; int numericValue = -1; int upper = -1; int lower = -1; int title = -1; string comment = "<none>"; //Getting the character name at the memory address specified ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create(); try { string icuDataDir = GetIcuDataDir(); Icu.SetDataDirectory(icuDataDir); Icu.UErrorCode error; Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME; int len = Icu.u_CharName(puaIndex, choice, out name, out error); genCategory = charPropEngine.get_GeneralCategory(puaIndex); combiningClass = charPropEngine.get_CombiningClass(puaIndex); bidiCategory = charPropEngine.get_BidiCategory(puaIndex); decomposition = charPropEngine.get_Decomposition(puaIndex); fullDecomp = charPropEngine.get_FullDecomp(puaIndex); // Note: isNumber merely checks the General category, it doesn't check to see if there is a valid numeric value. isNumber = charPropEngine.get_IsNumber(puaIndex); if(isNumber) numericValue = charPropEngine.get_NumericValue(puaIndex); comment = charPropEngine.get_Comment(puaIndex); upper = charPropEngine.get_ToUpperCh(puaIndex); lower = charPropEngine.get_ToLowerCh(puaIndex); title = charPropEngine.get_ToTitleCh(puaIndex); } finally { // Must release pointer to free memory-mapping before we try to restore files. Marshal.ReleaseComObject(charPropEngine); charPropEngine = null; Icu.Cleanup(); // clean up the ICU files / data } // StringWriter used to print hexadecimal values in the error messages. StringWriter stringWriter = new StringWriter(new System.Globalization.NumberFormatInfo()); string errorMessage = "PUA Character " + puaIndex.ToString("x",new System.Globalization.NumberFormatInfo()) + " has an incorrect "; //Check Name [1] Assert.AreEqual(puaName, name, errorMessage + "name."); //Check general category [2] Assert.AreEqual(puaGenCat, genCategory, errorMessage + "general category."); //Check combining class [3] Assert.AreEqual(puaCombiningClass, combiningClass, errorMessage + "combining class."); //Check Bidi class [4] Assert.AreEqual(puaBidiClass, bidiCategory, errorMessage + "bidi class value."); //Check Decomposition [5] stringWriter.WriteLine(errorMessage + "decomposition."); stringWriter.WriteLine("Decomposition, {0:x}, is incorrect",(int)decomposition[0]); Assert.AreEqual(puaDecomposition, decomposition, stringWriter.ToString()); //Check Numeric Value [6,7,8] if(puaNumeric != isNumber) Assert.AreEqual(puaNumeric,isNumber,errorMessage + "numeric type (i.e. does or doesn't have a numeric value when it should be the other)."); if(puaNumeric) Assert.AreEqual(puaNumericValue, numericValue, errorMessage + "numeric value."); //Check ISO Comment [11] Assert.AreEqual(puaComment,comment, errorMessage + "ISO commment"); //Check uppercase [12] stringWriter.Flush(); stringWriter.WriteLine(errorMessage + "upper case."); stringWriter.WriteLine("Found uppercase value: {0:x}",upper); Assert.AreEqual(puaToUpper,upper, stringWriter.ToString()); //Check lowercase [13] Assert.AreEqual(puaToLower,lower, errorMessage + "lower case."); //Check titlecase [14] Assert.AreEqual(puaToTitle,title, errorMessage + "title case."); }
/// <summary> /// Checks all the values of a character in the UnicodeData.txt. /// Checks: fields 1-8,11-14 /// (Skips, 9 and 10, the "Bidi Mirrored" and "Unicode Version 1" /// </summary> /// <param name="puaIndex"></param><param name="puaName"></param> /// <param name="puaGenCat"></param><param name="puaCombiningClass"></param> /// <param name="puaBidiClass"></param><param name="puaDecomposition"></param> /// <param name="puaNumeric"></param><param name="puaNumericValue"></param> /// <param name="puaComment"></param><param name="puaToUpper"></param> /// <param name="puaToLower"></param><param name="puaToTitle"></param> public static void Check_PUA( int puaIndex, string puaName, LgGeneralCharCategory puaGenCat, int puaCombiningClass, LgBidiCategory puaBidiClass, string puaDecomposition, bool puaNumeric, int puaNumericValue, string puaComment, int puaToUpper, int puaToLower, int puaToTitle ) { string name = ""; LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn; int combiningClass = 0; string decomposition = "None"; LgBidiCategory bidiCategory = LgBidiCategory.kbicL; string fullDecomp = "I have no clue"; bool isNumber = false; int numericValue = -1; int upper = -1; int lower = -1; int title = -1; string comment = "<none>"; //Getting the character name at the memory address specified ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create(); try { string icuDataDir = GetIcuDataDir(); Icu.SetDataDirectory(icuDataDir); Icu.UErrorCode error; Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME; int len = Icu.u_CharName(puaIndex, choice, out name, out error); genCategory = charPropEngine.get_GeneralCategory(puaIndex); combiningClass = charPropEngine.get_CombiningClass(puaIndex); bidiCategory = charPropEngine.get_BidiCategory(puaIndex); decomposition = charPropEngine.get_Decomposition(puaIndex); fullDecomp = charPropEngine.get_FullDecomp(puaIndex); // Note: isNumber merely checks the General category, it doesn't check to see if there is a valid numeric value. isNumber = charPropEngine.get_IsNumber(puaIndex); if (isNumber) { numericValue = charPropEngine.get_NumericValue(puaIndex); } comment = charPropEngine.get_Comment(puaIndex); upper = charPropEngine.get_ToUpperCh(puaIndex); lower = charPropEngine.get_ToLowerCh(puaIndex); title = charPropEngine.get_ToTitleCh(puaIndex); } finally { // Must release pointer to free memory-mapping before we try to restore files. Marshal.ReleaseComObject(charPropEngine); charPropEngine = null; Icu.Cleanup(); // clean up the ICU files / data } // StringWriter used to print hexadecimal values in the error messages. StringWriter stringWriter = new StringWriter(new System.Globalization.NumberFormatInfo()); string errorMessage = "PUA Character " + puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) + " has an incorrect "; //Check Name [1] Assert.AreEqual(puaName, name, errorMessage + "name."); //Check general category [2] Assert.AreEqual(puaGenCat, genCategory, errorMessage + "general category."); //Check combining class [3] Assert.AreEqual(puaCombiningClass, combiningClass, errorMessage + "combining class."); //Check Bidi class [4] Assert.AreEqual(puaBidiClass, bidiCategory, errorMessage + "bidi class value."); //Check Decomposition [5] stringWriter.WriteLine(errorMessage + "decomposition."); stringWriter.WriteLine("Decomposition, {0:x}, is incorrect", (int)decomposition[0]); Assert.AreEqual(puaDecomposition, decomposition, stringWriter.ToString()); //Check Numeric Value [6,7,8] if (puaNumeric != isNumber) { Assert.AreEqual(puaNumeric, isNumber, errorMessage + "numeric type (i.e. does or doesn't have a numeric value when it should be the other)."); } if (puaNumeric) { Assert.AreEqual(puaNumericValue, numericValue, errorMessage + "numeric value."); } //Check ISO Comment [11] Assert.AreEqual(puaComment, comment, errorMessage + "ISO commment"); //Check uppercase [12] stringWriter.Flush(); stringWriter.WriteLine(errorMessage + "upper case."); stringWriter.WriteLine("Found uppercase value: {0:x}", upper); Assert.AreEqual(puaToUpper, upper, stringWriter.ToString()); //Check lowercase [13] Assert.AreEqual(puaToLower, lower, errorMessage + "lower case."); //Check titlecase [14] Assert.AreEqual(puaToTitle, title, errorMessage + "title case."); }
private bool IsEosChar(int ch, LgGeneralCharCategory cc, int ich) { if (ch == 0x002E) // full stop return !IsSpecialPeriod(ich); // The preliminary check of cc is just for efficiency. All these characters have this property. return TsStringUtils.IsEndOfSentenceChar(ch, cc); }
bool IsWhite(char c) { LgGeneralCharCategory cc = m_cpe.get_GeneralCategory(c); return(cc == LgGeneralCharCategory.kccZs); }