/// <summary> /// Define SurrogatePairProperty class /// <a href="http://www.unicode.org/charts/PDF/UD800.pdf">Newline</a> /// <a href="http://www.unicode.org/charts/PDF/UDC00.pdf">Newline</a> /// </summary> public SurrogatePairProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges) { bool isValid = false; foreach (UnicodeRange range in expectedRanges) { if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, surrogatePairRangeList, "Surrogates", GroupAttributes.GroupName)) { foreach (UnicodeRangeProperty data in surrogatePairRangeList) { if (data.Name.Equals("High Surrogates", StringComparison.OrdinalIgnoreCase)) { highMin = data.Range.StartOfUnicodeRange; highMax = data.Range.EndOfUnicodeRange; } else if (data.Name.Equals("Low Surrogates", StringComparison.OrdinalIgnoreCase)) { lowMin = data.Range.StartOfUnicodeRange; lowMax = data.Range.EndOfUnicodeRange; } } isValid = true; } surrogateRange = RangePropertyCollector.GetRange(new UnicodeRange(0x10000, TextUtil.MaxUnicodePoint), range); if (null != surrogateRange) { isValid = true; } } if (!isValid) { throw new ArgumentOutOfRangeException("expectedRanges", "SurrogatePairProperty, SurrogatePair ranges are beyond expected range. " + "Refert to Surrogates range and UTF32."); } }
/// <summary> /// Get Unicode range according to Unicode chart provided /// </summary> public static UnicodeRange GetUnicodeChartRange(UnicodeRangeDatabase unicodeDb, UnicodeChart chart) { foreach (Group script in unicodeDb.Scripts) { if (script.UnicodeChart == chart) { return(script.UnicodeRange); } if (null != script.SubGroups) { foreach (SubGroup subScript in script.SubGroups) { if (subScript.UnicodeChart == chart) { return(subScript.UnicodeRange); } } } } foreach (Group symbol in unicodeDb.SymbolsAndPunctuation) { if (symbol.UnicodeChart == chart) { return(symbol.UnicodeRange); } if (null != symbol.SubGroups) { foreach (SubGroup subSymbol in symbol.SubGroups) { if (subSymbol.UnicodeChart == chart) { return(subSymbol.UnicodeRange); } } } } throw new ArgumentException(@"Invalid UnicodeChart, " + Enum.GetName(typeof(UnicodeChart), chart) + ". No match in the database."); }
/// <summary> /// Define SurrogatePairDictionary class /// <a href="http://unicode.org/reports/tr9/">Newline</a> /// </summary> public BidiProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges) { bool isValid = false; foreach (UnicodeRange range in expectedRanges) { if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, bidiPropertyRangeList, "Arabic", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, bidiPropertyRangeList, "Hebrew", GroupAttributes.Name)) { isValid = true; } } if (InitializeBidiDictionary(expectedRanges)) { isValid = true; } if (!isValid) { throw new ArgumentOutOfRangeException("expectedRanges", "BidiProperty, Bidi ranges are beyond expected range. " + "Refer to Arabic and Hebrew ranges."); } // Reset isValid to validate Latin range isValid = false; foreach (UnicodeRange expectedRange in expectedRanges) { UnicodeRange range = RangePropertyCollector.GetRange(new UnicodeRange(0x0030, 0x0039), expectedRange); if (null != range) { latinRangeList.Add(range); isValid = true; } range = RangePropertyCollector.GetRange(new UnicodeRange(0x0041, 0x005A), expectedRange); if (null != range) { latinRangeList.Add(range); isValid = true; } range = RangePropertyCollector.GetRange(new UnicodeRange(0x0061, 0x007A), expectedRange); if (null != range) { latinRangeList.Add(range); isValid = true; } } if (!isValid) { throw new ArgumentOutOfRangeException("expectedRanges", "BidiProperty, Bidi ranges are beyond expected range. " + "0x0030 - 0x0039, 0x0041 - 0x005A, and 0x0061 - 0x007A ranges are needed to construct Bidi string."); } }
/// <summary> /// Walk through Unicode range database to build up property according to Group attribute /// </summary> public static bool BuildPropertyDataList( UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange, List <UnicodeRangeProperty> dataList, string name, GroupAttributes attribute) { bool isAdded = false; foreach (Group script in unicodeDb.Scripts) { string scriptAttrib = script.GroupName; if (attribute == GroupAttributes.Name) { scriptAttrib = script.Name; } else if (attribute == GroupAttributes.Ids) { scriptAttrib = script.Ids; } if (scriptAttrib.Equals(name, StringComparison.OrdinalIgnoreCase)) { UnicodeRange range = GetRange(script.UnicodeRange, expectedRange); if (null != range) { dataList.Add(new UnicodeRangeProperty(TextUtil.UnicodeChartType.Script, script.Name, script.Ids, range)); isAdded = true; } if (null != script.SubGroups) { foreach (SubGroup subScript in script.SubGroups) { range = GetRange(subScript.UnicodeRange, expectedRange); if (null != range) { dataList.Add(new UnicodeRangeProperty( TextUtil.UnicodeChartType.Script, subScript.SubGroupName, subScript.SubIds, range)); isAdded = true; } } } } } foreach (Group symbol in unicodeDb.SymbolsAndPunctuation) { string symbolAttrib = symbol.GroupName; if (attribute == GroupAttributes.Name) { symbolAttrib = symbol.Name; } else if (attribute == GroupAttributes.Ids) { symbolAttrib = symbol.Ids; } if (symbolAttrib.Equals(name, StringComparison.OrdinalIgnoreCase)) { TextUtil.UnicodeChartType type = TextUtil.UnicodeChartType.Other; if ((symbol.GroupName.ToLower(CultureInfo.InvariantCulture)).Contains("symbols") || (symbol.Name.ToLower(CultureInfo.InvariantCulture)).Contains("symbols")) { type = TextUtil.UnicodeChartType.Symbol; } else if ((symbol.GroupName.ToLower(CultureInfo.InvariantCulture)).Contains("punctuation") || (symbol.Name.ToLower(CultureInfo.InvariantCulture)).Contains("punctuation")) { type = TextUtil.UnicodeChartType.Punctuation; } UnicodeRange range = GetRange(symbol.UnicodeRange, expectedRange); if (null != range) { dataList.Add(new UnicodeRangeProperty(type, symbol.Name, symbol.Ids, range)); isAdded = true; } if (null != symbol.SubGroups) { foreach (SubGroup subSymbol in symbol.SubGroups) { range = GetRange(subSymbol.UnicodeRange, expectedRange); if (null != range) { dataList.Add(new UnicodeRangeProperty(type, subSymbol.SubGroupName, subSymbol.SubIds, range)); isAdded = true; } } } } } return(isAdded); }
/// <summary> /// Define SurrogatePairDictionary class /// <a href="http://www.unicode.org/reports/tr15/">Newline</a> /// <a href="http://www.unicode.org/charts/normalization/">Newline</a> /// </summary> public TextNormalizationProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges) { bool isValid = false; foreach (UnicodeRange range in expectedRanges) { if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "Latin", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "CJK Unified Ideographs (Han)", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "CJK Compatibility Ideographs", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "Katakana", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "Hangul Jamo", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "Hangul Syllables", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "Arabic", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, range, textNormalizationRangeList, "Greek", GroupAttributes.Name)) { isValid = true; } } if (InitializeTextNormalizationPropertyDictionary(expectedRanges)) { isValid = true; } if (!isValid) { throw new ArgumentOutOfRangeException("expectedRanges", "TextNormalizationProperty, " + "code points for text normalization ranges are beyond expected range. " + "Refert to Latin, CJK Unified Ideographs (Han) " + "CJK Compatibility Ideographs, Katakana, Hangul Jamo, Hangul Syllables, Arabic, and Greek ranges."); } }
private void CreateProperties(StringProperties properties, UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange) { if (null != properties.HasNumbers) { if ((bool)properties.HasNumbers) { numberProperty = new NumberProperty(unicodeDb, expectedRange); minNumOfCodePoint += NumberProperty.MINNUMOFCODEPOINT; propertyDictionary.Add(PropertyName.Number, numberProperty); } } if (null != properties.IsBidirectional) { if ((bool)properties.IsBidirectional) { bidiProperty = new BidiProperty(unicodeDb, expectedRange); minNumOfCodePoint += BidiProperty.MINNUMOFCODEPOINT; propertyDictionary.Add(PropertyName.Bidi, bidiProperty); } } if (null != properties.NormalizationForm) { textNormalizationProperty = new TextNormalizationProperty(unicodeDb, expectedRange); minNumOfCodePoint += TextNormalizationProperty.MINNUMOFCODEPOINT; propertyDictionary.Add(PropertyName.TextNormalization, textNormalizationProperty); } if (null != properties.MinNumberOfCombiningMarks) { if (0 != properties.MinNumberOfCombiningMarks) { combiningMarksProperty = new CombiningMarksProperty(unicodeDb, expectedRange); minNumOfCodePoint += CombiningMarksProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfCombiningMarks; propertyDictionary.Add(PropertyName.CombiningMarks, combiningMarksProperty); } } if (null != properties.MinNumberOfEndUserDefinedCodePoints) { if (0 != properties.MinNumberOfEndUserDefinedCodePoints) { eudcProperty = new EudcProperty(unicodeDb, expectedRange); minNumOfCodePoint += EudcProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfEndUserDefinedCodePoints; propertyDictionary.Add(PropertyName.Eudc, eudcProperty); } } if (null != properties.MinNumberOfLineBreaks) { if (0 != properties.MinNumberOfLineBreaks) { lineBreakProperty = new LineBreakProperty(expectedRange); minNumOfCodePoint += LineBreakProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfLineBreaks; propertyDictionary.Add(PropertyName.LineBreak, lineBreakProperty); } } if (null != properties.MinNumberOfSurrogatePairs) { if (0 != properties.MinNumberOfSurrogatePairs) { surrogatePairProperty = new SurrogatePairProperty(unicodeDb, expectedRange); minNumOfCodePoint += SurrogatePairProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfSurrogatePairs; propertyDictionary.Add(PropertyName.Surrogate, surrogatePairProperty); } } if (null != properties.MinNumberOfTextSegmentationCodePoints) { if (0 != properties.MinNumberOfTextSegmentationCodePoints) { textSegmentationProperty = new TextSegmentationProperty(unicodeDb, expectedRange); minNumOfCodePoint += TextSegmentationProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfTextSegmentationCodePoints; propertyDictionary.Add(PropertyName.TextSegmentation, textSegmentationProperty); } } }
/// <summary> /// Define SurrogatePairDictionary class /// <a href="http://unicode.org/reports/tr9/">Newline</a> /// </summary> public BidiProperty(UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange) { bool isValid = false; if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, bidiPropertyRangeList, "Arabic", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, bidiPropertyRangeList, "Hebrew", GroupAttributes.Name)) { isValid = true; } if (InitializeBidiDictionary(expectedRange)) { isValid = true; } if (!isValid) { throw new ArgumentOutOfRangeException("BidiProperty, Bidi ranges are beyond expected range, " + String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " + String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) + ". Refer to Arabic and Hebrew ranges."); } UnicodeRange range = RangePropertyCollector.GetRange(new UnicodeRange(0x0030, 0x0039), expectedRange); if (null != range) { latinRangeList.Add(range); isValid = true; } range = RangePropertyCollector.GetRange(new UnicodeRange(0x0041, 0x005A), expectedRange); if (null != range) { latinRangeList.Add(range); isValid = true; } range = RangePropertyCollector.GetRange(new UnicodeRange(0x0061, 0x007A), expectedRange); if (null != range) { latinRangeList.Add(range); isValid = true; } if (!isValid) { throw new ArgumentOutOfRangeException("BidiProperty, Bidi ranges are beyond expected range, " + String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " + String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) + ". 0x0030 - 0x0039, 0x0041 - 0x005A, and 0x0061 - 0x007A ranges are needed to construct Bidi string."); } }
/// <summary> /// Define SurrogatePairDictionary class /// <a href="http://www.unicode.org/reports/tr15/">Newline</a> /// <a href="http://www.unicode.org/charts/normalization/">Newline</a> /// </summary> public TextNormalizationProperty(UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange) { bool isValid = false; if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "Latin", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "CJK Unified Ideographs (Han)", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "CJK Compatibility Ideographs", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "Katakana", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "Hangul Jamo", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "Hangul Syllables", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "Arabic", GroupAttributes.Name)) { isValid = true; } if (RangePropertyCollector.BuildPropertyDataList( unicodeDb, expectedRange, textNormalizationRangeList, "Greek", GroupAttributes.Name)) { isValid = true; } if (InitializeTextNormalizationPropertyDictionary(expectedRange)) { isValid = true; } if (!isValid) { throw new ArgumentOutOfRangeException("TextNormalizationProperty, code points for text normalization ranges are beyond expected range, " + String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " + String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) + ". Refert to Latin, CJK Unified Ideographs (Han) " + "CJK Compatibility Ideographs, Katakana, Hangul Jamo, Hangul Syllables, Arabic, and Greek ranges."); } }