Exemplo n.º 1
0
        /// <summary>
        /// Test the character name at the memory address specified.
        /// Will assert an error if the PUA codepoint name is not correct.
        /// </summary>
        /// <param name="puaIndex">Unicode codepoint</param>
        /// <param name="puaName">Expected correct PUA codepoint name</param>
        /// <param name="puaGenCat">The expected PUA General Category</param>
        public static void Check_PUA(int puaIndex, string puaName, LgGeneralCharCategory puaGenCat)
        {
            string name = "";
            LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn;

            //Getting the character name at the memory address specified
            ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create();

            try
            {
                string icuDataDir = GetIcuDataDir();
                Icu.SetDataDirectory(icuDataDir);
                Icu.UErrorCode      error;
                Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME;
                int len = Icu.u_CharName(puaIndex, choice, out name, out error);
                genCategory = charPropEngine.get_GeneralCategory(puaIndex);
            }
            finally
            {
                // Must release pointer to free memory-mapping before we try to restore files.
                Marshal.ReleaseComObject(charPropEngine);
                charPropEngine = null;
                Icu.Cleanup();                          // clean up the ICU files / data
            }

            //Check to make sure expected result is the same as actual result, if not, output error
            Assert.AreEqual(puaName, name, "PUA Character " +
                            puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) +
                            " is incorrect");

            //Check to make sure expected result is the same as actual result, if not, output error
            Assert.AreEqual(puaGenCat, genCategory, "PUA Character " +
                            puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) +
                            " has an incorrect digit value");
        }
Exemplo n.º 2
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Test the character name at the memory address specified.
		/// Will assert an error if the PUA codepoint name is not correct.
		/// </summary>
		/// <param name="puaIndex">Unicode codepoint</param>
		/// <param name="puaName">Expected correct PUA codepoint name</param>
		/// <param name="puaGenCat">The expected PUA General Category</param>
		/// ------------------------------------------------------------------------------------
		public static void Check_PUA(int puaIndex, string puaName, LgGeneralCharCategory puaGenCat)
		{
			string name = string.Empty;
			LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn;

			//Getting the character name at the memory address specified
			ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create();
			try
			{
				Icu.UErrorCode error;
				Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME;
				Icu.u_CharName(puaIndex, choice, out name, out error);
				genCategory = charPropEngine.get_GeneralCategory(puaIndex);
			}
			finally
			{
				// Must release pointer to free memory-mapping before we try to restore files.
				Marshal.ReleaseComObject(charPropEngine);
				charPropEngine = null;
				Icu.Cleanup();		// clean up the ICU files / data
			}

			//Check to make sure expected result is the same as actual result, if not, output error
			Assert.AreEqual(puaName, name, "PUA Character " +
				puaIndex.ToString("x",new System.Globalization.NumberFormatInfo()) +
				" is incorrect");

			//Check to make sure expected result is the same as actual result, if not, output error
			Assert.AreEqual(puaGenCat, genCategory, "PUA Character " +
				puaIndex.ToString("x",new System.Globalization.NumberFormatInfo()) +
				" has an incorrect digit value");
		}
Exemplo n.º 3
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets an enumeration associated with the given General Category
        /// </summary>
        /// <param name="generalCategory">The UCD general category
        /// see: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values</param>
        /// <returns>
        /// Returns the only instance that matches the requested value.
        /// Thus two calls will get the same instance.
        /// </returns>
        /// ------------------------------------------------------------------------------------
        public static UcdProperty GetInstance(LgGeneralCharCategory generalCategory)
        {
            InitializeHashTables();
            Dictionary <int, UcdProperty> propertyHash = s_ucdPropertyDict[UcdCategories.generalCategory];

            return(propertyHash[(int)generalCategory]);
        }
Exemplo n.º 4
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Determines whether the specified character is not a word-forming character
        /// according to ICU, but should be allowed to be a word-forming override.
        /// </summary>
        /// <param name="chr">The character to test</param>
        /// <returns><c>true</c> if the specified character is able to be overridden to be
        /// word-forming (i.e., is a punctuation or symbol character according to ICU or is one
        /// of the special exceptions);
        /// <c>false</c> otherwise</returns>
        /// ------------------------------------------------------------------------------------
        public bool CanBeWordFormingOverride(string chr)
        {
            if (string.IsNullOrEmpty(chr) || chr.Length > 1)
            {
                return(false);
            }

            int code = chr[0];

            if (code == 0x200C || code == 0x200D)
            {
                return(true);                // Zero-width non-joiner or zero-width joiner
            }
            string category = (m_langDef == null) ? null : m_langDef.GetOverrideCharCategory(chr[0]);

            if (category != null)
            {
                return(category[0] == 'S' || category[0] == 'P');
            }

            LgGeneralCharCategory chrCategory = m_cpe.get_GeneralCategory(code);

            if (chrCategory == LgGeneralCharCategory.kccSc ||
                chrCategory == LgGeneralCharCategory.kccSk ||
                chrCategory == LgGeneralCharCategory.kccSm ||
                chrCategory == LgGeneralCharCategory.kccSo)
            {
                return(true);                // symbol
            }

            if (chrCategory == LgGeneralCharCategory.kccPc ||
                chrCategory == LgGeneralCharCategory.kccPd ||
                chrCategory == LgGeneralCharCategory.kccPe ||
                chrCategory == LgGeneralCharCategory.kccPf ||
                chrCategory == LgGeneralCharCategory.kccPi ||
                chrCategory == LgGeneralCharCategory.kccPo ||
                chrCategory == LgGeneralCharCategory.kccPs)
            {
                return(true);                // punctuation
            }

            return(false);
        }
Exemplo n.º 5
0
        private string ProcessParseException(Exception e)
        {
            var ise = e as InvalidShapeException;

            if (ise != null)
            {
                string phonemesFoundSoFar = ise.String.Substring(0, ise.Position);
                string rest = ise.String.Substring(ise.Position);
                LgGeneralCharCategory cc = m_cache.ServiceLocator.UnicodeCharProps.get_GeneralCategory(rest[0]);
                if (cc == LgGeneralCharCategory.kccMn)
                {
                    // the first character is a diacritic, combining type of character
                    // insert a space so it does not show on top of a single quote in the message string
                    rest = " " + rest;
                }
                return(string.Format(ParserCoreStrings.ksHCInvalidWordform, ise.String, ise.Position + 1, rest, phonemesFoundSoFar));
            }

            return(String.Format(ParserCoreStrings.ksHCDefaultErrorMsg, e.Message));
        }
Exemplo n.º 6
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determines whether the given character is used to end a sentence.
		/// </summary>
		/// <param name="ch">The character.</param>
		/// <param name="cc">The general character category.</param>
		/// <returns>
		/// 	<c>true</c> if the character ends a sentence; otherwise, <c>false</c>.
		/// </returns>
		/// ------------------------------------------------------------------------------------
		public static bool IsEndOfSentenceChar(int ch, LgGeneralCharCategory cc)
		{
			// The preliminary check of cc is just for efficiency. All these characters have this property.
			// EXCLAMATION MARK
			// FULL STOP
			// QUESTION MARK
			// ARMENIAN EXCLAMATION MARK
			// ARMENIAN QUESTION MARK
			// ARMENIAN FULL STOP
			// ARABIC QUESTION MARK
			// ARABIC FULL STOP
			// SYRIAC END OF PARAGRAPH
			// SYRIAC SUPRALINEAR FULL STOP
			// SYRIAC SUBLINEAR FULL STOP
			// DEVANAGARI DANDA
			// DEVANAGARI DOUBLE DANDA
			// MYANMAR SIGN LITTLE SECTION
			// MYANMAR SIGN SECTION
			// ETHIOPIC FULL STOP
			// ETHIOPIC QUESTION MARK
			// ETHIOPIC PARAGRAPH SEPARATOR
			// CANADIAN SYLLABICS FULL STOP
			// MONGOLIAN FULL STOP
			// MONGOLIAN MANCHU FULL STOP
			// LIMBU EXCLAMATION MARK
			// LIMBU QUESTION MARK
			// DOUBLE EXCLAMATION MARK
			// INTERROBANG
			// DOUBLE QUESTION MARK
			// QUESTION EXCLAMATION MARK
			// EXCLAMATION QUESTION MARK
			// IDEOGRAPHIC FULL STOP
			// SMALL FULL STOP
			// SMALL QUESTION MARK
			// SMALL EXCLAMATION MARK
			// FULLWIDTH EXCLAMATION MARK
			// FULLWIDTH FULL STOP
			// FULLWIDTH QUESTION MARK
			// HALFWIDTH IDEOGRAPHIC FULL STOP
			// Except this is not a normal punctuation character.
			return (cc == LgGeneralCharCategory.kccPo && (ch == 0x0021 || ch == 0x002E || ch == 0x003F || ch == 0x055C || ch == 0x055E || ch == 0x0589 || ch == 0x061F || ch == 0x06D4 || ch == 0x0700 || ch == 0x0701 || ch == 0x0702 || ch == 0x0964 || ch == 0x0965 || ch == 0x104A || ch == 0x104B || ch == 0x1362 || ch == 0x1367 || ch == 0x1368 || ch == 0x166E || ch == 0x1803 || ch == 0x1809 || ch == 0x1944 || ch == 0x1945 || ch == 0x203C || ch == 0x203D || ch == 0x2047 || ch == 0x2048 || ch == 0x2049 || ch == 0x3002 || ch == 0xFE52 || ch == 0xFE56 || ch == 0xFE57 || ch == 0xFF01 || ch == 0xFF0E || ch == 0xFF1F || ch == 0xFF61)) || ch == 0x00A7;
			// SECTION SIGN (used for forced segment breaks w/o punctuation)
		}
Exemplo n.º 7
0
		/// <summary>
		/// Constructor.
		/// </summary>
		/// <param name="generalCharCategory"></param>
		/// <param name="isLetter"></param>
		/// <param name="isWordforming"></param>
		/// <param name="isPunctuation"></param>
		public CharacterProperty(LgGeneralCharCategory generalCharCategory, bool isLetter, bool isWordforming, bool isPunctuation)
		{
			m_generalCharCategory = generalCharCategory;
			m_isLetter = isLetter;
			m_isWordforming = isWordforming;
			m_isPunctuation = isPunctuation;
		}
Exemplo n.º 8
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Determines whether the specified cc is diacritic.
        /// </summary>
        /// <param name="cc">The cc.</param>
        /// <returns>
        ///     <c>true</c> if the specified cc is diacritic; otherwise, <c>false</c>.
        /// </returns>
        /// ------------------------------------------------------------------------------------
        public override bool IsDiacritic(char cc)
        {
            LgGeneralCharCategory cat = m_charPropEngine.get_GeneralCategory(cc);

            return(cat == LgGeneralCharCategory.kccMc || cat == LgGeneralCharCategory.kccMn);
        }
Exemplo n.º 9
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Gets an enumeration associated with the given General Category
		/// </summary>
		/// <param name="generalCategory">The UCD general category
		/// see: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values</param>
		/// <returns>
		/// Returns the only instance that matches the requested value.
		/// Thus two calls will get the same instance.
		/// </returns>
		/// ------------------------------------------------------------------------------------
		public static UcdProperty GetInstance(LgGeneralCharCategory generalCategory)
		{
			InitializeHashTables();
			Dictionary<int, UcdProperty> propertyHash = s_ucdPropertyDict[UcdCategories.generalCategory];
			return propertyHash[(int)generalCategory];
		}
Exemplo n.º 10
0
		/// <summary>
		/// Checks all the values of a character in the UnicodeData.txt.
		/// Checks: fields 1-8,11-14
		/// (Skips, 9 and 10, the "Bidi Mirrored" and "Unicode Version 1"
		/// </summary>
		/// <param name="puaIndex"></param><param name="puaName"></param>
		/// <param name="puaGenCat"></param><param name="puaCombiningClass"></param>
		/// <param name="puaBidiClass"></param><param name="puaDecomposition"></param>
		/// <param name="puaNumeric"></param><param name="puaNumericValue"></param>
		/// <param name="puaComment"></param><param name="puaToUpper"></param>
		/// <param name="puaToLower"></param><param name="puaToTitle"></param>
		public static void Check_PUA(
			int puaIndex,
			string puaName,
			LgGeneralCharCategory puaGenCat,
			int puaCombiningClass,
			LgBidiCategory puaBidiClass,
			string puaDecomposition,
			bool puaNumeric,
			int puaNumericValue,
			string puaComment,
			int puaToUpper,
			int puaToLower,
			int puaToTitle
			)
		{
			string name = "";
			LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn;
			int combiningClass = 0;
			string decomposition = "None";
			LgBidiCategory bidiCategory = LgBidiCategory.kbicL;
			string fullDecomp = "I have no clue";
			bool isNumber = false;
			int numericValue = -1;
			int upper = -1;
			int lower = -1;
			int title = -1;
			string comment = "<none>";

			//Getting the character name at the memory address specified
			ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create();
			try
			{
				string icuDataDir = GetIcuDataDir();
				Icu.SetDataDirectory(icuDataDir);
				Icu.UErrorCode error;
				Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME;
				int len = Icu.u_CharName(puaIndex, choice, out name, out error);
				genCategory = charPropEngine.get_GeneralCategory(puaIndex);
				combiningClass = charPropEngine.get_CombiningClass(puaIndex);
				bidiCategory = charPropEngine.get_BidiCategory(puaIndex);
				decomposition = charPropEngine.get_Decomposition(puaIndex);
				fullDecomp = charPropEngine.get_FullDecomp(puaIndex);
				// Note: isNumber merely checks the General category, it doesn't check to see if there is a valid numeric value.
				isNumber = charPropEngine.get_IsNumber(puaIndex);
				if(isNumber)
					numericValue = charPropEngine.get_NumericValue(puaIndex);
				comment = charPropEngine.get_Comment(puaIndex);

				upper = charPropEngine.get_ToUpperCh(puaIndex);
				lower = charPropEngine.get_ToLowerCh(puaIndex);
				title = charPropEngine.get_ToTitleCh(puaIndex);
			}
			finally
			{
				// Must release pointer to free memory-mapping before we try to restore files.
				Marshal.ReleaseComObject(charPropEngine);
				charPropEngine = null;
				Icu.Cleanup();		// clean up the ICU files / data
			}

			// StringWriter used to print hexadecimal values in the error messages.
			StringWriter stringWriter = new StringWriter(new System.Globalization.NumberFormatInfo());

			string errorMessage = "PUA Character " +
				puaIndex.ToString("x",new System.Globalization.NumberFormatInfo()) +
				" has an incorrect ";

			//Check Name [1]
			Assert.AreEqual(puaName, name, errorMessage + "name.");

			//Check general category [2]
			Assert.AreEqual(puaGenCat, genCategory, errorMessage + "general category.");

			//Check combining class [3]
			Assert.AreEqual(puaCombiningClass, combiningClass, errorMessage + "combining class.");

			//Check Bidi class [4]
			Assert.AreEqual(puaBidiClass, bidiCategory, errorMessage + "bidi class value.");

			//Check Decomposition [5]
			stringWriter.WriteLine(errorMessage + "decomposition.");
			stringWriter.WriteLine("Decomposition, {0:x}, is incorrect",(int)decomposition[0]);
			Assert.AreEqual(puaDecomposition, decomposition, stringWriter.ToString());

			//Check Numeric Value [6,7,8]
			if(puaNumeric != isNumber)
				Assert.AreEqual(puaNumeric,isNumber,errorMessage +
					"numeric type (i.e. does or doesn't have a numeric value when it should be the other).");
			if(puaNumeric)
				Assert.AreEqual(puaNumericValue, numericValue, errorMessage + "numeric value.");
			//Check ISO Comment [11]
			Assert.AreEqual(puaComment,comment, errorMessage + "ISO commment");

			//Check uppercase [12]
			stringWriter.Flush();
			stringWriter.WriteLine(errorMessage + "upper case.");
			stringWriter.WriteLine("Found uppercase value: {0:x}",upper);
			Assert.AreEqual(puaToUpper,upper, stringWriter.ToString());
			//Check lowercase [13]
			Assert.AreEqual(puaToLower,lower, errorMessage + "lower case.");
			//Check titlecase [14]
			Assert.AreEqual(puaToTitle,title, errorMessage + "title case.");
		}
Exemplo n.º 11
0
        /// <summary>
        /// Checks all the values of a character in the UnicodeData.txt.
        /// Checks: fields 1-8,11-14
        /// (Skips, 9 and 10, the "Bidi Mirrored" and "Unicode Version 1"
        /// </summary>
        /// <param name="puaIndex"></param><param name="puaName"></param>
        /// <param name="puaGenCat"></param><param name="puaCombiningClass"></param>
        /// <param name="puaBidiClass"></param><param name="puaDecomposition"></param>
        /// <param name="puaNumeric"></param><param name="puaNumericValue"></param>
        /// <param name="puaComment"></param><param name="puaToUpper"></param>
        /// <param name="puaToLower"></param><param name="puaToTitle"></param>
        public static void Check_PUA(
            int puaIndex,
            string puaName,
            LgGeneralCharCategory puaGenCat,
            int puaCombiningClass,
            LgBidiCategory puaBidiClass,
            string puaDecomposition,
            bool puaNumeric,
            int puaNumericValue,
            string puaComment,
            int puaToUpper,
            int puaToLower,
            int puaToTitle
            )
        {
            string name = "";
            LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn;
            int            combiningClass     = 0;
            string         decomposition      = "None";
            LgBidiCategory bidiCategory       = LgBidiCategory.kbicL;
            string         fullDecomp         = "I have no clue";
            bool           isNumber           = false;
            int            numericValue       = -1;
            int            upper   = -1;
            int            lower   = -1;
            int            title   = -1;
            string         comment = "<none>";

            //Getting the character name at the memory address specified
            ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create();

            try
            {
                string icuDataDir = GetIcuDataDir();
                Icu.SetDataDirectory(icuDataDir);
                Icu.UErrorCode      error;
                Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME;
                int len = Icu.u_CharName(puaIndex, choice, out name, out error);
                genCategory    = charPropEngine.get_GeneralCategory(puaIndex);
                combiningClass = charPropEngine.get_CombiningClass(puaIndex);
                bidiCategory   = charPropEngine.get_BidiCategory(puaIndex);
                decomposition  = charPropEngine.get_Decomposition(puaIndex);
                fullDecomp     = charPropEngine.get_FullDecomp(puaIndex);
                // Note: isNumber merely checks the General category, it doesn't check to see if there is a valid numeric value.
                isNumber = charPropEngine.get_IsNumber(puaIndex);
                if (isNumber)
                {
                    numericValue = charPropEngine.get_NumericValue(puaIndex);
                }
                comment = charPropEngine.get_Comment(puaIndex);

                upper = charPropEngine.get_ToUpperCh(puaIndex);
                lower = charPropEngine.get_ToLowerCh(puaIndex);
                title = charPropEngine.get_ToTitleCh(puaIndex);
            }
            finally
            {
                // Must release pointer to free memory-mapping before we try to restore files.
                Marshal.ReleaseComObject(charPropEngine);
                charPropEngine = null;
                Icu.Cleanup();                          // clean up the ICU files / data
            }

            // StringWriter used to print hexadecimal values in the error messages.
            StringWriter stringWriter = new StringWriter(new System.Globalization.NumberFormatInfo());

            string errorMessage = "PUA Character " +
                                  puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) +
                                  " has an incorrect ";

            //Check Name [1]
            Assert.AreEqual(puaName, name, errorMessage + "name.");

            //Check general category [2]
            Assert.AreEqual(puaGenCat, genCategory, errorMessage + "general category.");

            //Check combining class [3]
            Assert.AreEqual(puaCombiningClass, combiningClass, errorMessage + "combining class.");

            //Check Bidi class [4]
            Assert.AreEqual(puaBidiClass, bidiCategory, errorMessage + "bidi class value.");

            //Check Decomposition [5]
            stringWriter.WriteLine(errorMessage + "decomposition.");
            stringWriter.WriteLine("Decomposition, {0:x}, is incorrect", (int)decomposition[0]);
            Assert.AreEqual(puaDecomposition, decomposition, stringWriter.ToString());

            //Check Numeric Value [6,7,8]
            if (puaNumeric != isNumber)
            {
                Assert.AreEqual(puaNumeric, isNumber, errorMessage +
                                "numeric type (i.e. does or doesn't have a numeric value when it should be the other).");
            }
            if (puaNumeric)
            {
                Assert.AreEqual(puaNumericValue, numericValue, errorMessage + "numeric value.");
            }
            //Check ISO Comment [11]
            Assert.AreEqual(puaComment, comment, errorMessage + "ISO commment");

            //Check uppercase [12]
            stringWriter.Flush();
            stringWriter.WriteLine(errorMessage + "upper case.");
            stringWriter.WriteLine("Found uppercase value: {0:x}", upper);
            Assert.AreEqual(puaToUpper, upper, stringWriter.ToString());
            //Check lowercase [13]
            Assert.AreEqual(puaToLower, lower, errorMessage + "lower case.");
            //Check titlecase [14]
            Assert.AreEqual(puaToTitle, title, errorMessage + "title case.");
        }
Exemplo n.º 12
0
		private bool IsEosChar(int ch, LgGeneralCharCategory cc, int ich)
		{
			if (ch == 0x002E) // full stop
				return !IsSpecialPeriod(ich);
			// The preliminary check of cc is just for efficiency. All these characters have this property.
			return TsStringUtils.IsEndOfSentenceChar(ch, cc);
		}
Exemplo n.º 13
0
        bool IsWhite(char c)
        {
            LgGeneralCharCategory cc = m_cpe.get_GeneralCategory(c);

            return(cc == LgGeneralCharCategory.kccZs);
        }