コード例 #1
0
ファイル: StringUtils.cs プロジェクト: sillsdev/WorldPad
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determines whether the specified character is a mark (either as defined by the
		/// character property engine or as overridden by the language definition.
		/// </summary>
		/// <param name="chr">The character.</param>
		/// <param name="langDef">The language definition (can be null).</param>
		/// <param name="cpe">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		private static bool IsMark(char chr, LanguageDefinition langDef, ILgCharacterPropertyEngine cpe)
		{
			string category = (langDef == null) ? null : langDef.GetOverrideCharCategory(chr);
			return ((category == null && cpe.get_IsMark(chr)) || (category != null && category[0] == 'M'));
		}
コード例 #2
0
ファイル: StringUtils.cs プロジェクト: sillsdev/WorldPad
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Update the characters in the Private Use Area.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public static void UpdatePUACollection(LanguageDefinition langDef, List<string> chars)
		{
			List<PUACharacter> customPuaCharacters = GetDefinedCustomPUACharsFromICU();

			// Remove any PUA characters that are no longer in the character list. If there
			// are any new PUA characters in the valid characters list, they
			// will be added to this collection in IsCodePointDefined().
			int codePoint;
			string data;
			for (int i = langDef.PuaDefinitionCount - 1; i >= 0; i--)
			{
				langDef.GetPuaDefinition(i, out codePoint, out data);
				char character = (char)new PUACharacter(codePoint).Character;

				string category = langDef.GetOverrideCharCategory(character);
				if (!chars.Contains(character.ToString()) && (category == null || category[0] != 'M'))
				{
					langDef.RemovePuaDefinition(new CharDef(codePoint, data));
				}
			}

			// Now make sure all the custom PUA characters specified in the valid characters
			// list are also added to the language definition's PUA collection.
			foreach (string chr in chars)
			{
				// Go through the codepoints in the character.
				foreach (char c in chr)
					UpdateLangDefPUACollection((int)c, langDef, customPuaCharacters);
			}
		}
コード例 #3
0
ファイル: StringUtils.cs プロジェクト: sillsdev/WorldPad
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Finds the first base character/combining diacritic combination and removes any
		/// remaining characters.
		/// </summary>
		/// <param name="origChars">The original string of characters.</param>
		/// <param name="langDef">The language definition which could override some of the
		/// properties defined by the <paramref name="cpe"/>.</param>
		/// <param name="cpe">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		public static string ValidateCharacterSequence(string origChars,
			LanguageDefinition langDef, ILgCharacterPropertyEngine cpe)
		{
			// Allow spaces (Zs), hard line breaks (Zl), and other formatting characters (Cf) in
			// isolation only.
			if (origChars.Length == 1)
			{
				string category = (langDef == null) ? null : langDef.GetOverrideCharCategory(origChars[0]);
				switch (category)
				{
					case "Zl":
					case "Zs":
					case "Cf": return origChars;
				}
				if (category == null &&
					(cpe.get_GeneralCategory(origChars[0]) == LgGeneralCharCategory.kccZl ||
					cpe.get_GeneralCategory(origChars[0]) == LgGeneralCharCategory.kccZs ||
					cpe.get_GeneralCategory(origChars[0]) == LgGeneralCharCategory.kccCf))
				{
					return origChars;
				}
			}

			StringBuilder newChars = new StringBuilder();
			bool baseFound = false;
			bool fPrecedingCharWasMark = false;
			// Extract first base character and any following diacritics
			for (int ich = 0; ich < origChars.Length; ich++ )
			{
				char chr = origChars[ich];

				if (!baseFound)
				{
					string category = (langDef == null) ? null : langDef.GetOverrideCharCategory(chr);
					if (category == null)
					{
						// If this is not a valid base character, keep looking.
						if (!cpe.get_IsLetter(chr) && !cpe.get_IsNumber(chr) &&
							cpe.get_GeneralCategory(chr) != LgGeneralCharCategory.kccCo &&
							!cpe.get_IsPunctuation(chr) && !cpe.get_IsSymbol(chr))
							continue;
					}
					else
					{
						// If this is not a valid base character, keep looking.
						if (category[0] != 'L' && category[0] != 'N' &&
							category != "Co" && category[0] != 'P' && category[0] != 'S')
							continue;
					}

					baseFound = true;
				}
				else
				{
					// If this is not a diacritic or a ZWJ or ZWNJ between diacritics,
					// discard the rest of the string.
					if (IsMark(chr, langDef, cpe))
					{
						fPrecedingCharWasMark = true;
					}
					else if ((chr == '\u200C' || chr == '\u200D') && fPrecedingCharWasMark &&
						origChars.Length > ich + 1 && IsMark(origChars[ich + 1], langDef, cpe))
					{
						fPrecedingCharWasMark = false;
					}
					else
					{
						// This handles special situations like Korean, where multiple base letters
						// (representing phonemes) can compose into a single base letter (representing a
						// syllable).
						string composed = Icu.Normalize(origChars, Icu.UNormalizationMode.UNORM_NFKC);
						if (composed.Length == 1)
							return composed;
						break;
					}
				}

				if (baseFound)
					newChars.Append(chr);
			}

			return newChars.ToString();
		}