예제 #1
0
파일: Group.cs 프로젝트: yeungxh/TestApi
 public Group(UnicodeRange range, string groupName, string name, string ids, UnicodeChart chart)
 {
     UnicodeRange = new UnicodeRange(range);
     GroupName    = groupName;
     Name         = name;
     Ids          = ids;
     UnicodeChart = chart;
     SubGroups    = null;
 }
예제 #2
0
 /// <summary>
 /// Define LineBreakProperty class,
 /// <a href="http://unicode.org/reports/tr13/tr13-5.html">Newline</a>
 /// </summary>
 public LineBreakProperty(UnicodeRange expectedRange)
 {
     if (!InitializeLineBreakCharDictionary(expectedRange))
     {
         throw new ArgumentOutOfRangeException("LineBreakProperty, Linebreak ranges are beyond expected range, " +
                                               String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " +
                                               String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) +
                                               ". Refert to CR, LF, CRLF, NEL, VT, FF, LS, and PS.");
     }
 }
예제 #3
0
        /// <summary>
        /// Get a random Unicode point (points if it is Surrogate) from the given range
        /// </summary>
        public static string GetRandomCodePoint(UnicodeRange range, int iterations, int [] exclusions, int seed)
        {
            Random rand      = new Random(seed);
            int    codePoint = 0;
            string retStr    = string.Empty;

            if (null != exclusions)
            {
                Array.Sort(exclusions);
            }

            for (int i = 0; i < iterations; i++)
            {
                codePoint = rand.Next(range.StartOfUnicodeRange, range.EndOfUnicodeRange);
                if (null != exclusions)
                {
                    int index = Array.BinarySearch(exclusions, codePoint);
                    int ctr   = 0;
                    while (index >= 0)
                    {
                        codePoint = rand.Next(range.StartOfUnicodeRange, range.EndOfUnicodeRange);
                        index     = Array.BinarySearch(exclusions, codePoint);
                        ctr++;
                        if (MAXNUMITERATION == ctr)
                        {
                            throw new ArgumentOutOfRangeException("TextUtil, " + ctr + " loop has been reached. GetRandomCodePoint may have infinite loop." +
                                                                  " Range " + String.Format(CultureInfo.InvariantCulture, "0x{0:X}", range.StartOfUnicodeRange) + " - " +
                                                                  String.Format(CultureInfo.InvariantCulture, "0x{0:X}", range.EndOfUnicodeRange) + " are likely excluded ");
                        }
                    }
                }

                if (codePoint > 0xFFFF)
                {
                    // In case it is surrogate
                    retStr += Convert.ToChar((codePoint - 0x10000) / 0x400 + 0xD800);
                    retStr += Convert.ToChar((codePoint - 0x10000) % 0x400 + 0xDC00);
                }
                else
                {
                    retStr += Convert.ToChar(codePoint);
                }
            }

            return(retStr);
        }
예제 #4
0
        /// <summary>
        /// Get new range - if expectedRange is smaller, new range is expectedRange. Otherwise, return false
        /// </summary>
        public static UnicodeRange GetRange(UnicodeRange range, UnicodeRange expectedRange)
        {
            if (0 == expectedRange.StartOfUnicodeRange && TextUtil.MaxUnicodePoint == expectedRange.EndOfUnicodeRange)
            {
                // don't care if whole Unicode range is given
                return(new UnicodeRange(range.StartOfUnicodeRange, range.EndOfUnicodeRange));
            }

            if (expectedRange.StartOfUnicodeRange > range.EndOfUnicodeRange || expectedRange.EndOfUnicodeRange < range.StartOfUnicodeRange)
            {
                return(null);
            }

            int low  = expectedRange.StartOfUnicodeRange > range.StartOfUnicodeRange ? expectedRange.StartOfUnicodeRange : range.StartOfUnicodeRange;
            int high = expectedRange.EndOfUnicodeRange < range.EndOfUnicodeRange ? expectedRange.EndOfUnicodeRange : range.EndOfUnicodeRange;

            return(new UnicodeRange(low, high));
        }
예제 #5
0
        /// <summary>
        /// Define SurrogatePairProperty class
        /// <a href="http://www.unicode.org/charts/PDF/UD800.pdf">Newline</a>
        /// <a href="http://www.unicode.org/charts/PDF/UDC00.pdf">Newline</a>
        /// </summary>
        public SurrogatePairProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges)
        {
            bool isValid = false;

            foreach (UnicodeRange range in expectedRanges)
            {
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        surrogatePairRangeList,
                        "Surrogates",
                        GroupAttributes.GroupName))
                {
                    foreach (UnicodeRangeProperty data in surrogatePairRangeList)
                    {
                        if (data.Name.Equals("High Surrogates", StringComparison.OrdinalIgnoreCase))
                        {
                            highMin = data.Range.StartOfUnicodeRange;
                            highMax = data.Range.EndOfUnicodeRange;
                        }
                        else if (data.Name.Equals("Low Surrogates", StringComparison.OrdinalIgnoreCase))
                        {
                            lowMin = data.Range.StartOfUnicodeRange;
                            lowMax = data.Range.EndOfUnicodeRange;
                        }
                    }
                    isValid = true;
                }

                surrogateRange = RangePropertyCollector.GetRange(new UnicodeRange(0x10000, TextUtil.MaxUnicodePoint), range);
                if (null != surrogateRange)
                {
                    isValid = true;
                }
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("expectedRanges", "SurrogatePairProperty, SurrogatePair ranges are beyond expected range. " +
                                                      "Refert to Surrogates range and UTF32.");
            }
        }
예제 #6
0
        private bool InitializeLineBreakCharDictionary(UnicodeRange expectedRange)
        {
            char [] cr = { '\u000D' };
            lineBreakCharDictionary.Add("CR", cr);
            char [] lf = { '\u000A' };
            lineBreakCharDictionary.Add("LF", lf);
            char [] crlf = { '\u000D', '\u000A' };
            lineBreakCharDictionary.Add("CRLF", crlf);
            char [] nel = { '\u0085' };
            lineBreakCharDictionary.Add("NEL", nel);
            char [] vt = { '\u000B' };
            lineBreakCharDictionary.Add("VT", vt);
            char [] ff = { '\u000C' };
            lineBreakCharDictionary.Add("FF", ff);
            char [] ls = { '\u2028' };
            lineBreakCharDictionary.Add("LS", ls);
            char [] ps = { '\u2029' };
            lineBreakCharDictionary.Add("PS", ps);

            int  i       = 0;
            bool isValid = false;

            lineBreakCodePoints = new int [cr.Length + lf.Length + crlf.Length + nel.Length + vt.Length + ff.Length + ls.Length + ps.Length];
            Dictionary <string, char[]> .ValueCollection valueColl = lineBreakCharDictionary.Values;
            foreach (char[] values in valueColl)
            {
                foreach (char codePoint in values)
                {
                    if (codePoint >= expectedRange.StartOfUnicodeRange && codePoint <= expectedRange.EndOfUnicodeRange)
                    {
                        lineBreakCodePoints[i++] = (int)codePoint;
                        isValid = true;
                    }
                }
            }
            Array.Resize(ref lineBreakCodePoints, i);
            return(isValid);
        }
예제 #7
0
        /// <summary>
        /// Walk through Unicode range database to build up property according to Group attribute
        /// </summary>
        public static bool BuildPropertyDataList(
            UnicodeRangeDatabase unicodeDb,
            UnicodeRange expectedRange,
            List <UnicodeRangeProperty> dataList,
            string name,
            GroupAttributes attribute)
        {
            bool isAdded = false;

            foreach (Group script in unicodeDb.Scripts)
            {
                string scriptAttrib = script.GroupName;
                if (attribute == GroupAttributes.Name)
                {
                    scriptAttrib = script.Name;
                }
                else if (attribute == GroupAttributes.Ids)
                {
                    scriptAttrib = script.Ids;
                }

                if (scriptAttrib.Equals(name, StringComparison.OrdinalIgnoreCase))
                {
                    UnicodeRange range = GetRange(script.UnicodeRange, expectedRange);
                    if (null != range)
                    {
                        dataList.Add(new UnicodeRangeProperty(TextUtil.UnicodeChartType.Script, script.Name, script.Ids, range));
                        isAdded = true;
                    }

                    if (null != script.SubGroups)
                    {
                        foreach (SubGroup subScript in script.SubGroups)
                        {
                            range = GetRange(subScript.UnicodeRange, expectedRange);
                            if (null != range)
                            {
                                dataList.Add(new UnicodeRangeProperty(
                                                 TextUtil.UnicodeChartType.Script,
                                                 subScript.SubGroupName,
                                                 subScript.SubIds,
                                                 range));
                                isAdded = true;
                            }
                        }
                    }
                }
            }

            foreach (Group symbol in unicodeDb.SymbolsAndPunctuation)
            {
                string symbolAttrib = symbol.GroupName;
                if (attribute == GroupAttributes.Name)
                {
                    symbolAttrib = symbol.Name;
                }
                else if (attribute == GroupAttributes.Ids)
                {
                    symbolAttrib = symbol.Ids;
                }

                if (symbolAttrib.Equals(name, StringComparison.OrdinalIgnoreCase))
                {
                    TextUtil.UnicodeChartType type = TextUtil.UnicodeChartType.Other;
                    if ((symbol.GroupName.ToLower(CultureInfo.InvariantCulture)).Contains("symbols") ||
                        (symbol.Name.ToLower(CultureInfo.InvariantCulture)).Contains("symbols"))
                    {
                        type = TextUtil.UnicodeChartType.Symbol;
                    }
                    else if ((symbol.GroupName.ToLower(CultureInfo.InvariantCulture)).Contains("punctuation") ||
                             (symbol.Name.ToLower(CultureInfo.InvariantCulture)).Contains("punctuation"))
                    {
                        type = TextUtil.UnicodeChartType.Punctuation;
                    }

                    UnicodeRange range = GetRange(symbol.UnicodeRange, expectedRange);
                    if (null != range)
                    {
                        dataList.Add(new UnicodeRangeProperty(type, symbol.Name, symbol.Ids, range));
                        isAdded = true;
                    }

                    if (null != symbol.SubGroups)
                    {
                        foreach (SubGroup subSymbol in symbol.SubGroups)
                        {
                            range = GetRange(subSymbol.UnicodeRange, expectedRange);
                            if (null != range)
                            {
                                dataList.Add(new UnicodeRangeProperty(type, subSymbol.SubGroupName, subSymbol.SubIds, range));
                                isAdded = true;
                            }
                        }
                    }
                }
            }
            return(isAdded);
        }
예제 #8
0
 /// <summary>
 /// Copy constructor
 /// </summary>
 /// <param name="range">A UnicodeRange object to be copied</param>
 public UnicodeRange(UnicodeRange range)
 {
     startOfUnicodeRange = range.StartOfUnicodeRange;
     endOfUnicodeRange   = range.EndOfUnicodeRange;
 }
예제 #9
0
 /// <summary>
 /// constructor of PropertyData stuct
 /// </summary>
 public UnicodeRangeProperty(TextUtil.UnicodeChartType type, string name, string ids, UnicodeRange range)
 {
     Type       = type;
     Name       = name;
     CultureIDs = ids;
     Range      = new UnicodeRange(range.StartOfUnicodeRange, range.EndOfUnicodeRange);
 }
예제 #10
0
        /// <summary>
        /// Define SurrogatePairDictionary class
        /// <a href="http://www.unicode.org/reports/tr15/">Newline</a>
        /// <a href="http://www.unicode.org/charts/normalization/">Newline</a>
        /// </summary>
        public TextNormalizationProperty(UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange)
        {
            bool isValid = false;

            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Latin",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "CJK Unified Ideographs (Han)",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "CJK Compatibility Ideographs",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Katakana",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Hangul Jamo",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Hangul Syllables",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Arabic",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Greek",
                    GroupAttributes.Name))
            {
                isValid = true;
            }


            if (InitializeTextNormalizationPropertyDictionary(expectedRange))
            {
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("TextNormalizationProperty, code points for text normalization ranges are beyond expected range, " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) + ". Refert to Latin,  CJK Unified Ideographs (Han) " +
                                                      "CJK Compatibility Ideographs, Katakana, Hangul Jamo, Hangul Syllables, Arabic, and  Greek ranges.");
            }
        }
예제 #11
0
        private void CreateProperties(StringProperties properties, UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange)
        {
            if (null != properties.HasNumbers)
            {
                if ((bool)properties.HasNumbers)
                {
                    numberProperty     = new NumberProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint += NumberProperty.MINNUMOFCODEPOINT;
                    propertyDictionary.Add(PropertyName.Number, numberProperty);
                }
            }

            if (null != properties.IsBidirectional)
            {
                if ((bool)properties.IsBidirectional)
                {
                    bidiProperty       = new BidiProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint += BidiProperty.MINNUMOFCODEPOINT;
                    propertyDictionary.Add(PropertyName.Bidi, bidiProperty);
                }
            }

            if (null != properties.NormalizationForm)
            {
                textNormalizationProperty = new TextNormalizationProperty(unicodeDb, expectedRange);
                minNumOfCodePoint        += TextNormalizationProperty.MINNUMOFCODEPOINT;
                propertyDictionary.Add(PropertyName.TextNormalization, textNormalizationProperty);
            }

            if (null != properties.MinNumberOfCombiningMarks)
            {
                if (0 != properties.MinNumberOfCombiningMarks)
                {
                    combiningMarksProperty = new CombiningMarksProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint     += CombiningMarksProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfCombiningMarks;
                    propertyDictionary.Add(PropertyName.CombiningMarks, combiningMarksProperty);
                }
            }

            if (null != properties.MinNumberOfEndUserDefinedCodePoints)
            {
                if (0 != properties.MinNumberOfEndUserDefinedCodePoints)
                {
                    eudcProperty       = new EudcProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint += EudcProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfEndUserDefinedCodePoints;
                    propertyDictionary.Add(PropertyName.Eudc, eudcProperty);
                }
            }

            if (null != properties.MinNumberOfLineBreaks)
            {
                if (0 != properties.MinNumberOfLineBreaks)
                {
                    lineBreakProperty  = new LineBreakProperty(expectedRange);
                    minNumOfCodePoint += LineBreakProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfLineBreaks;
                    propertyDictionary.Add(PropertyName.LineBreak, lineBreakProperty);
                }
            }

            if (null != properties.MinNumberOfSurrogatePairs)
            {
                if (0 != properties.MinNumberOfSurrogatePairs)
                {
                    surrogatePairProperty = new SurrogatePairProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint    += SurrogatePairProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfSurrogatePairs;
                    propertyDictionary.Add(PropertyName.Surrogate, surrogatePairProperty);
                }
            }

            if (null != properties.MinNumberOfTextSegmentationCodePoints)
            {
                if (0 != properties.MinNumberOfTextSegmentationCodePoints)
                {
                    textSegmentationProperty = new TextSegmentationProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint       += TextSegmentationProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfTextSegmentationCodePoints;
                    propertyDictionary.Add(PropertyName.TextSegmentation, textSegmentationProperty);
                }
            }
        }
예제 #12
0
        private bool InitializeDictionaries(UnicodeRange expectedRange)
        {
            char [] ko = { '\u1100', '\u1161', '\u11A8' };
            sampleGraphemeClusterDictionary.Add("Ko", ko);
            char [] ta = { '\u0BA8', '\u0BBF' };
            sampleGraphemeClusterDictionary.Add("ta", ta);
            char [] th = { '\u0E40', '\u0E01' };
            sampleGraphemeClusterDictionary.Add("th", th);
            char [] devanagari = { '\u0937', '\u093F', '\u0915', '\u094D', '\u0937', '\u093F' };
            sampleGraphemeClusterDictionary.Add("devanagari", devanagari);
            char [] sk = { '\u0063', '\u0068' };
            sampleGraphemeClusterDictionary.Add("sk", sk);
            char [] other = { '\u0067', '\u0308', '\u006B', '\u02B7' };
            sampleGraphemeClusterDictionary.Add("other", other);

            char [] all = { '\u000D', '\u000A', '\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0009', '\u000B',
                            '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019','\u001A',
                            '\u001B', '\u001C', '\u001D', '\u001E', '\u001F', '\u0020', '\u007F', '\u0080', '\u0081', '\u0082', '\u0083', '\u0084', '\u0085','\u0086',
                            '\u0087', '\u0088', '\u0089', '\u008A', '\u008B', '\u008C', '\u008D', '\u008E', '\u008F', '\u0090', '\u0091', '\u0092', '\u0093','\u0094',
                            '\u0095', '\u0096', '\u0097', '\u0098', '\u0099', '\u009A', '\u009B', '\u009C', '\u009D', '\u009E', '\u009F', '\u00A0', '\u00AD','\u2000',
                            '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A' };
            graphemeClusterBreakPropertyValuesDictionary.Add("all", all);
            char [] th1 = { '\u0E30', '\u0E32', '\u0E33', '\u0E40', '\u0E41', '\u0E42', '\u0E43', '\u0E44', '\u0E45' };
            graphemeClusterBreakPropertyValuesDictionary.Add("th", th1);
            char [] lao = { '\u0EB0', '\u0EB2', '\u0EB3', '\u0EC0', '\u0EC1', '\u0EC2', '\u0EC3', '\u0EC4' };
            graphemeClusterBreakPropertyValuesDictionary.Add("lao", lao);
            char [] ko1 = { '\u1100', '\u1101', '\u1102', '\u1103', '\u1104', '\u1105', '\u1106', '\u1107', '\u1108', '\u1109', '\u110A', '\u110B', '\u110C',
                            '\u110D', '\u110E', '\u110F', '\u1110', '\u1111', '\u1112', '\u1113', '\u1114', '\u1115', '\u1116', '\u1117', '\u1118', '\u1119','\u111A',
                            '\u111B', '\u111C', '\u111D', '\u111E', '\u111F', '\u1120', '\u1121', '\u1122', '\u1123', '\u1124', '\u1125', '\u1126', '\u1127','\u1128',
                            '\u1129', '\u112A', '\u112B', '\u112C', '\u112D', '\u112E', '\u112F', '\u1130', '\u1131', '\u1132', '\u1133', '\u1134', '\u1135','\u1136',
                            '\u1137', '\u1138', '\u1139', '\u1140', '\u1141', '\u1142', '\u1143', '\u1144', '\u1145', '\u1146', '\u1147', '\u1148', '\u1149','\u114A',
                            '\u114B', '\u114C', '\u114D', '\u114E', '\u114F', '\u1150', '\u1151', '\u1152', '\u1153', '\u1154', '\u1155', '\u1156', '\u1157','\u1158',
                            '\u1159', '\u111F', '\u1160', '\u1161', '\u1162', '\u1163', '\u1164', '\u1165', '\u1166', '\u1167', '\u1168', '\u1169', '\u116A','\u116B',
                            '\u116C', '\u116D', '\u116E', '\u116F', '\u1170', '\u1171', '\u1172', '\u1173', '\u1174', '\u1175', '\u1176', '\u1177', '\u1178','\u1179',
                            '\u117A', '\u117B', '\u117C', '\u117D', '\u117E', '\u117F', '\u1180', '\u1181', '\u1182', '\u1183', '\u1184', '\u1185', '\u1186','\u1187',
                            '\u1188', '\u1189', '\u118A', '\u118B', '\u118C', '\u118D', '\u118E', '\u118F', '\u1190', '\u1191', '\u1192', '\u1193', '\u1194','\u1195',
                            '\u1196', '\u1197', '\u1198', '\u1199', '\u119A', '\u119B', '\u119C', '\u119E', '\u119F', '\u11A0', '\u11A1', '\u11A2', '\u11A8','\u11A9',
                            '\u11AA', '\u11AB', '\u11AC', '\u11AD', '\u11AE', '\u11AF', '\u11B0', '\u11B1', '\u11B2', '\u11B3', '\u11B4', '\u11B5', '\u11B6','\u11B7',
                            '\u11B8', '\u11B9', '\u11BA', '\u11BB', '\u11BC', '\u11BD', '\u11BE', '\u11BF', '\u11C0', '\u11C1', '\u11C2', '\u11C3', '\u11C4','\u11C5',
                            '\u11C6', '\u11C7', '\u11C8', '\u11C9', '\u11CA', '\u11CB', '\u11CC', '\u11CE', '\u11CF', '\u11D0', '\u11D1', '\u11D2', '\u11D3','\u11D4',
                            '\u11D5', '\u11D6', '\u11D7', '\u11D8', '\u11D9', '\u11DA', '\u11DB', '\u11DC', '\u11DE', '\u11DF', '\u11F0', '\u11F1', '\u11F2','\u11F3',
                            '\u11F4', '\u11F5', '\u11F7', '\u11F8', '\u11F9', '\uAC00', '\uAC1C', '\uAC38', '\uAC01', '\uAC02', '\uAC03', '\uAc04' };
            graphemeClusterBreakPropertyValuesDictionary.Add("ko", ko1);

            char [] all1 = { '\u000A', '\u000D', '\u000B', '\u000C', '\u0020', '\u0027', '\u0085', '\u002D', '\u002E', '\u202F', '\u00A0', '\u2028', '\u2029',
                             '\u2000', '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', '\u2010', '\u2011','\u2018',
                             '\u2019', '\u201B', '\u2024', '\uFE52', '\uFF07', '\uFF0E', '\u00B7', '\u05F4', '\u2027', '\u003A', '\u0387', '\uFE13', '\uFE55','\uFF1A',
                             '\u066C', '\uFE50', '\uFE54', '\uFE63', '\uFF0D', '\uFF0C', '\uFF1B' };
            wordBreakPropertyValuesDictionary.Add("all", all1);
            char [] katakana = { '\u3031', '\u3032', '\u3033', '\u3034', '\u3035', '\u309B', '\u309C', '\u30A0', '\u30FC', '\uFF70' };
            wordBreakPropertyValuesDictionary.Add("ja", katakana);
            char [] he = { '\u05F3' };
            wordBreakPropertyValuesDictionary.Add("he", he);
            char [] hy = { '\u055A', '\u058A' };
            wordBreakPropertyValuesDictionary.Add("hy", hy);
            char [] tibet = { '\u0F0B' };
            wordBreakPropertyValuesDictionary.Add("tibet", tibet);
            char [] mongolia = { '\u1806' };
            wordBreakPropertyValuesDictionary.Add("mongolia", mongolia);

            char [] all2 = { '\u000A', '\u000D', '\u0085', '\u00A0', '\u05F3', '\u2000', '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006', '\u2007',
                             '\u2008', '\u2009', '\u200A', '\u2028', '\u2029', '\u002E', '\u2024', '\uFE52', '\uFF0E', '\u002D', '\u003A', '\u055D', '\u060C','\u060D',
                             '\u07F8', '\u1802', '\u1808', '\u2013', '\u2014', '\u3001', '\uFE10', '\uFE11', '\uFE13', '\uFE31', '\uFE32', '\uFE50', '\uFE51','\uFE55',
                             '\uFE58', '\uFE63', '\uFF0C', '\uFF0D', '\uFF1A', '\uFF64' };
            sentenceBreakPropertyValuesDictionary.Add("all", all2);

            bool isValid = false;
            int  i       = 0;

            textSegmentationCodePoints = new int [ko.Length + ta.Length + th.Length + devanagari.Length + sk.Length + other.Length + all.Length + th1.Length
                                                  + lao.Length + ko1.Length + all1.Length + katakana.Length + he.Length + hy.Length + tibet.Length + mongolia.Length + all2.Length];

            Dictionary <string, char[]> .ValueCollection valueColl1 = sampleGraphemeClusterDictionary.Values;
            foreach (char [] values in valueColl1)
            {
                foreach (char codePoint in values)
                {
                    if (codePoint >= expectedRange.StartOfUnicodeRange && codePoint <= expectedRange.EndOfUnicodeRange)
                    {
                        textSegmentationCodePoints[i++] = (int)codePoint;
                        isValid = true;
                    }
                }
            }

            Dictionary <string, char[]> .ValueCollection valueColl2 = graphemeClusterBreakPropertyValuesDictionary.Values;
            foreach (char [] values in valueColl2)
            {
                foreach (char codePoint in values)
                {
                    if (codePoint >= expectedRange.StartOfUnicodeRange && codePoint <= expectedRange.EndOfUnicodeRange)
                    {
                        textSegmentationCodePoints[i++] = (int)codePoint;
                        isValid = true;
                    }
                }
            }

            Dictionary <string, char[]> .ValueCollection valueColl3 = wordBreakPropertyValuesDictionary.Values;
            foreach (char [] values in valueColl3)
            {
                foreach (char codePoint in values)
                {
                    if (codePoint >= expectedRange.StartOfUnicodeRange && codePoint <= expectedRange.EndOfUnicodeRange)
                    {
                        textSegmentationCodePoints[i++] = (int)codePoint;
                        isValid = true;
                    }
                }
            }

            Dictionary <string, char[]> .ValueCollection valueColl4 = sentenceBreakPropertyValuesDictionary.Values;
            foreach (char [] values in valueColl4)
            {
                foreach (char codePoint in values)
                {
                    if (codePoint >= expectedRange.StartOfUnicodeRange && codePoint <= expectedRange.EndOfUnicodeRange)
                    {
                        textSegmentationCodePoints[i++] = (int)codePoint;
                        isValid = true;
                    }
                }
            }
            Array.Resize(ref textSegmentationCodePoints, i);
            Array.Sort(textSegmentationCodePoints);

            return(isValid);
        }
예제 #13
0
        private static void InitializeProperties()
        {
            if (null != properties.UnicodeRange)
            {
                range = new UnicodeRange(properties.UnicodeRange.StartOfUnicodeRange, properties.UnicodeRange.EndOfUnicodeRange);
            }
            else
            {
                range = new UnicodeRange(0, TextUtil.MaxUnicodePoint);
            }

            // Validation for Unicode range provided against each property is done when each property is created
            propertyFactory = new PropertyFactory(properties, database, range);

            // Combining mark property needs latin alphabet
            if (propertyFactory.HasProperty(PropertyFactory.PropertyName.CombiningMarks))
            {
                InitializeAlphabetRangeList();
            }

            // Get minimum number of points
            minNumCodePoints = propertyFactory.MinNumOfCodePoint;

            if (null == properties.MinNumberOfCodePoints && null == properties.MaxNumberOfCodePoints)
            {
                if (minNumCodePoints < propertyFactory.MinNumOfCodePoint)
                {
                    minNumCodePoints = propertyFactory.MinNumOfCodePoint;
                }
                maxNumCodePoints = TextUtil.MAXNUMOFCODEPOINT;
                if (minNumCodePoints > maxNumCodePoints)
                {
                    throw new ArgumentOutOfRangeException(
                              "StringFactory, maximum number of code points is greater than maximum allowed " + maxNumCodePoints + ".");
                }
            }
            else if (null != properties.MinNumberOfCodePoints && null == properties.MaxNumberOfCodePoints)
            {
                minNumCodePoints = (int)properties.MinNumberOfCodePoints;
                if (minNumCodePoints > TextUtil.MAXNUMOFCODEPOINT)
                {
                    throw new ArgumentOutOfRangeException(
                              "StringFactory, maximum number of code points allowed is " + TextUtil.MAXNUMOFCODEPOINT + ".");
                }
                maxNumCodePoints = TextUtil.MAXNUMOFCODEPOINT;
            }
            else if (null == properties.MinNumberOfCodePoints && null != properties.MaxNumberOfCodePoints)
            {
                maxNumCodePoints = (int)properties.MaxNumberOfCodePoints;
                if (maxNumCodePoints < propertyFactory.MinNumOfCodePoint)
                {
                    throw new ArgumentOutOfRangeException(
                              "StringFactory, minimum number of code points needed is " + propertyFactory.MinNumOfCodePoint + ".");
                }
                minNumCodePoints = propertyFactory.MinNumOfCodePoint;
            }
            else
            {
                minNumCodePoints = (int)properties.MinNumberOfCodePoints;
                maxNumCodePoints = (int)properties.MaxNumberOfCodePoints;
                if (minNumCodePoints > maxNumCodePoints)
                {
                    throw new ArgumentOutOfRangeException("StringFactory, MinNumberOfCodePoints, " + minNumCodePoints + " cannot be bigger than " +
                                                          "MaxNumberOfCodePoints, " + maxNumCodePoints + ".");
                }
            }
        }
예제 #14
0
        /// <summary>
        /// Define SurrogatePairDictionary class
        /// <a href="http://unicode.org/reports/tr9/">Newline</a>
        /// </summary>
        public BidiProperty(UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange)
        {
            bool isValid = false;

            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    bidiPropertyRangeList,
                    "Arabic",
                    GroupAttributes.Name))
            {
                isValid = true;
            }

            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    bidiPropertyRangeList,
                    "Hebrew",
                    GroupAttributes.Name))
            {
                isValid = true;
            }

            if (InitializeBidiDictionary(expectedRange))
            {
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("BidiProperty, Bidi ranges are beyond expected range, " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) +
                                                      ". Refer to Arabic and Hebrew ranges.");
            }

            UnicodeRange range = RangePropertyCollector.GetRange(new UnicodeRange(0x0030, 0x0039), expectedRange);

            if (null != range)
            {
                latinRangeList.Add(range);
                isValid = true;
            }

            range = RangePropertyCollector.GetRange(new UnicodeRange(0x0041, 0x005A), expectedRange);
            if (null != range)
            {
                latinRangeList.Add(range);
                isValid = true;
            }

            range = RangePropertyCollector.GetRange(new UnicodeRange(0x0061, 0x007A), expectedRange);
            if (null != range)
            {
                latinRangeList.Add(range);
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("BidiProperty, Bidi ranges are beyond expected range, " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) +
                                                      ". 0x0030 - 0x0039,  0x0041 - 0x005A, and 0x0061 - 0x007A ranges are needed to construct Bidi string.");
            }
        }
예제 #15
0
        private bool InitializeCombiningMarksDictionary(UnicodeRange expectedRange)
        {
            // Grave and acute accent
            char [] other = { '\u0302', '\u0307', '\u030A', '\u0315', '\u0316', '\u0317', '\u0318', '\u0319', '\u031A', '\u031C', '\u031D',
                              '\u031E', '\u031F', '\u0320', '\u0321', '\u0322', '\u0324', '\u032A', '\u032B', '\u032C', '\u032E', '\u0330','\u0332',
                              '\u0333', '\u0334', '\u0335', '\u0336', '\u0337', '\u0338', '\u0339', '\u033A', '\u033B', '\u033C', '\u033D','\u033F',
                              '\u0346', '\u0347', '\u0348', '\u0349', '\u034A', '\u034B', '\u034C', '\u034D', '\u034E', '\u034F', '\u0358','\u0359',
                              '\u035A', '\u035B', '\u035C', '\u035D', '\u035E', '\u0360', '\u0361', '\u0362', '\u0323', '\u0328', '\u032D','\u032F',
                              '\u1DC8', '\u1DC9', '\u1DCA', '\u1DCE', '\u1DCF', '\u1DD0', '\u1DD1', '\u1DD2', '\u1DD3', '\u1DD4', '\u1DD5','\u1DD6',
                              '\u1DD7', '\u1DD8', '\u1DD9', '\u1DDA', '\u1DDB', '\u1DDC', '\u1DDD', '\u1DDE', '\u1DDF', '\u1DE0', '\u1DE1','\u1DE2',
                              '\u1DE3', '\u1DE4', '\u1DE5', '\u1DE6', '\uFE20', '\uFE21', '\uFE22', '\uFE23' };
            combiningMarksDictionary.Add("other", other);
            char [] vi = { '\u0303', '\u0308', '\u031B', '\u0323', '\u0340', '\u0341' };
            combiningMarksDictionary.Add("vi", vi);
            char [] el = { '\u0300', '\u0301', '\u0304', '\u0305', '\u0306', '\u0308', '\u0313', '\u0314', '\u0331', '\u0342', '\u0343',
                           '\u0344', '\u0345', '\u1DC0', '\u1DC1', '\u1DC4', '\u1DC5', '\u1DC6', '\u1DC7', '\uFE24', '\uFE25', '\uFE26' };
            combiningMarksDictionary.Add("el", el);
            char [] hu = { '\u030B', '\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357' };
            combiningMarksDictionary.Add("hu", hu);
            char [] cs = { '\u030C' };
            combiningMarksDictionary.Add("cs", cs);
            char [] id = { '\u030D', '\u030E', '\u0325' };
            combiningMarksDictionary.Add("id", id);
            char [] ms = { '\u030D', '\u030E' };
            combiningMarksDictionary.Add("ms", ms);
            char [] srsp = { '\u030F', '\u0311', '\u0313', '\u0314', '\u033E', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356',
                             '\u0357', '\u1DC3' };
            combiningMarksDictionary.Add("sr-sp", srsp);
            char [] hr = { '\u030F', '\u1DC3' };
            combiningMarksDictionary.Add("hr", hr);
            char [] hi = { '\u0310', '\u0325' };
            combiningMarksDictionary.Add("hi", hi);
            char [] azaz = { '\u0311', '\u0313', '\u0314', '\u033E', '\u0327' };
            combiningMarksDictionary.Add("az-az", azaz);
            char [] uzuz = { '\u0311', '\u0313', '\u0314', '\u033E' };
            combiningMarksDictionary.Add("uz-uz", uzuz);
            char [] lv = { '\u0312', '\u0326' };
            combiningMarksDictionary.Add("lv", lv);
            char [] fi = { '\u0326', '\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357' };
            combiningMarksDictionary.Add("fi", fi);
            char [] hy = { '\u0313', '\u0314' };
            combiningMarksDictionary.Add("hy", hy);
            char [] he = { '\u0323' };
            combiningMarksDictionary.Add("he", he);
            char [] ar = { '\u0323' };
            combiningMarksDictionary.Add("ar", ar);
            char [] ro = { '\u0326', '\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357' };
            combiningMarksDictionary.Add("ro", ro);
            char [] fr = { '\u0327' };
            combiningMarksDictionary.Add("fr", fr);
            char [] tr = { '\u0327' };
            combiningMarksDictionary.Add("tr", tr);
            char [] pl = { '\u0328' };
            combiningMarksDictionary.Add("pl", pl);
            char [] lt = { '\u0328', '\u035B', '\u1DCB', '\u1DCC' };
            combiningMarksDictionary.Add("lt", lt);
            char [] yoruba = { '\u0329' };
            combiningMarksDictionary.Add("yoruba", yoruba);
            char [] de = { '\u0329', '\u0363', '\u0364', '\u0365', '\u0366', '\u0367', '\u0368', '\u0369', '\u036A', '\u036B', '\u036C', '\u036D',
                           '\u036E', '\u036F' };
            combiningMarksDictionary.Add("de", de);
            char [] et = { '\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357' };
            combiningMarksDictionary.Add("et", et);
            char [] ru = { '\u030B', '\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357', '\u1DC3' };
            combiningMarksDictionary.Add("ru", ru);
            char [] sk = { '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357', '\u1DC3' };
            combiningMarksDictionary.Add("sk", sk);
            char [] be = { '\u1DC3' };
            combiningMarksDictionary.Add("be", be);
            char [] bg = { '\u1DC3' };
            combiningMarksDictionary.Add("bg", be);
            char [] mk = { '\u1DC3' };
            combiningMarksDictionary.Add("mk", mk);
            char [] sl = { '\u1DC3' };
            combiningMarksDictionary.Add("sl", sl);
            char [] uk = { '\u1DC3' };
            combiningMarksDictionary.Add("uk", uk);
            char [] symbol = { '\u20D0', '\u20D1', '\u20D2', '\u20D3', '\u20D4', '\u20D5', '\u20D6', '\u20D7', '\u20D8', '\u20D9', '\u20DA',
                               '\u20DF', '\u20E0', '\u20E1', '\u20E2', '\u20E3', '\u20E4', '\u20E5', '\u20E6', '\u20E7', '\u20E8', '\u20E9','\u20EA',  '\u20EB',
                               '\u20EC', '\u20ED', '\u20EF', '\u20F0' };
            combiningMarksDictionary.Add("symbol", symbol);

            bool isValid = false;
            int  i       = 0;

            combiningMarks = new int [other.Length + vi.Length + el.Length + hu.Length + cs.Length + id.Length + ms.Length + srsp.Length + hr.Length +
                                      hi.Length + azaz.Length + uzuz.Length + lv.Length + fi.Length + hy.Length + he.Length + ar.Length + ro.Length + fr.Length + tr.Length +
                                      pl.Length + lt.Length + yoruba.Length + de.Length + et.Length + ru.Length + sk.Length + be.Length + bg.Length + mk.Length + sl.Length +
                                      uk.Length + symbol.Length];
            Dictionary <string, char[]> .ValueCollection valueColl = combiningMarksDictionary.Values;
            foreach (char [] values in valueColl)
            {
                foreach (char codePoint in values)
                {
                    if (codePoint >= expectedRange.StartOfUnicodeRange && codePoint <= expectedRange.EndOfUnicodeRange)
                    {
                        combiningMarks[i++] = (int)codePoint;
                        isValid             = true;
                    }
                }
            }
            Array.Resize(ref combiningMarks, i);
            return(isValid);
        }
예제 #16
0
 /// <summary>
 /// Create property objects according to string properties
 /// </summary>
 public PropertyFactory(StringProperties properties, UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange)
 {
     bidiProperty              = null;
     combiningMarksProperty    = null;
     eudcProperty              = null;
     lineBreakProperty         = null;
     numberProperty            = null;
     surrogatePairProperty     = null;
     textNormalizationProperty = null;
     textSegmentationProperty  = null;
     minNumOfCodePoint         = 0;
     propertyDictionary        = new Dictionary <PropertyFactory.PropertyName, IStringProperty>();
     CreateProperties(properties, unicodeDb, expectedRange);
 }
        /// <summary>
        /// Define SurrogatePairDictionary class
        /// <a href="http://unicode.org/reports/tr9/">Newline</a>
        /// </summary>
        public BidiProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges)
        {
            bool isValid = false;

            foreach (UnicodeRange range in expectedRanges)
            {
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        bidiPropertyRangeList,
                        "Arabic",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }

                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        bidiPropertyRangeList,
                        "Hebrew",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
            }

            if (InitializeBidiDictionary(expectedRanges))
            {
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("expectedRanges", "BidiProperty, Bidi ranges are beyond expected range. " +
                                                      "Refer to Arabic and Hebrew ranges.");
            }

            // Reset isValid to validate Latin range
            isValid = false;
            foreach (UnicodeRange expectedRange in expectedRanges)
            {
                UnicodeRange range = RangePropertyCollector.GetRange(new UnicodeRange(0x0030, 0x0039), expectedRange);
                if (null != range)
                {
                    latinRangeList.Add(range);
                    isValid = true;
                }

                range = RangePropertyCollector.GetRange(new UnicodeRange(0x0041, 0x005A), expectedRange);
                if (null != range)
                {
                    latinRangeList.Add(range);
                    isValid = true;
                }

                range = RangePropertyCollector.GetRange(new UnicodeRange(0x0061, 0x007A), expectedRange);
                if (null != range)
                {
                    latinRangeList.Add(range);
                    isValid = true;
                }
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("expectedRanges", "BidiProperty, Bidi ranges are beyond expected range. " +
                                                      "0x0030 - 0x0039,  0x0041 - 0x005A, and 0x0061 - 0x007A ranges are needed to construct Bidi string.");
            }
        }
예제 #18
0
        /// <summary>
        /// Dictionary to store code points corresponding to culture.
        /// </summary>
        private bool InitializeTextNormalizationPropertyDictionary(UnicodeRange expectedRange)
        {
            int [] othersymbols = { 0xFFE4,   0x21CD,  0xFFE8,  0xFFED,  0xFFEE,  0x3036, 0x1D15E, 0x1D15F, 0x1D160, 0x1D161, 0x1D162,
                                    0x1D163, 0x1D164, 0x1D1BB, 0x1D1BD, 0x1D1BF, 0x1D1BC, 0x1D1BE, 0x1D1C0 };
            textNormalizationPropertyDictionary.Add("othersymbols", othersymbols);

            int [] modifiersymbols = { 0x00B4, 0x0384, 0x1FFD, 0x02DC, 0x00AF, 0xFFE3, 0x02D8, 0x02D9, 0x00A8, 0x1FED, 0x0385,
                                       0x1FEE, 0x1FC1, 0x02DA, 0x02DD, 0x1FBD, 0x1FBF, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FFE, 0x1FDD, 0x1FDE, 0x1FDF, 0x00B8,
                                       0x02DB, 0x1FC0, 0x309B, 0x309C, 0xFF3E, 0x1FEF, 0xFF40 };
            textNormalizationPropertyDictionary.Add("modifiersymbols", modifiersymbols);

            int [] currencysymbols = { 0xFE69, 0xFF04, 0xFFE0, 0xFFE1, 0xFFE5, 0xFFE6 };
            textNormalizationPropertyDictionary.Add("currencysymbols", currencysymbols);

            int [] mathsymbols = { 0x207A,   0x208A,  0xFB29, 0xFE62, 0xFF0B,  0x2A74,  0xFE64,  0xFF1C,  0x226E,  0x207C, 0x208C,  0xFE66,
                                   0xFF1D,   0x2A75,  0x2A76, 0x2260, 0xFE65,  0xFF1E,  0x226F,  0xFF5C,  0xFF5E,  0xFFE2, 0xFFE9,  0x219A,  0xFFEA, 0xFFEB,
                                   0x219B,   0xFFEC,  0x21AE, 0x21CF, 0x21CE, 0x1D6DB, 0x1D715, 0x1D74F, 0x1D789, 0x1D7C3, 0x2204, 0x1D6C1, 0x1D6FB,
                                   0x1D735, 0x1D76F, 0x1D7A9, 0x2209, 0x220C,  0x2140,  0x207B,  0x208B,  0x2224,  0x2226, 0x222C,  0x222D,  0x2A0C, 0x222F,
                                   0x2230,   0x2241,  0x2244, 0x2247, 0x2249,  0x226D,  0x2262,  0x2270,  0x2271,  0x2274, 0x2275,  0x2278,  0x2279, 0x2280,
                                   0x2281,   0x22E0,  0x22E1, 0x2284, 0x2285,  0x2288,  0x2289,  0x22E2,  0x22E3,  0x22AC, 0x22AD,  0x22AE,  0x22AF, 0x22EA,
                                   0x22EB,   0x22EC,  0x22ED, 0x2ADC };
            textNormalizationPropertyDictionary.Add("mathsymbols", mathsymbols);

            int [] modifierletter = { 0x037A, 0x0374, 0xFF9E, 0xFF9F, 0xFF70 };
            textNormalizationPropertyDictionary.Add("modifierletter", modifierletter);

            int [] otherletter = { 0xFE70, 0xFE72, 0xFC5E, 0xFE74, 0xFC5F, 0xFE76, 0xFC60, 0xFE78, 0xFC61, 0xFE7A, 0xFC62, 0xFE7C,
                                   0xFC63, 0xFE7E, 0xFE71, 0xFE77, 0xFCF2, 0xFE79, 0xFCF3, 0xFE7B, 0xFCF4, 0xFE7D, 0xFE7F };
            textNormalizationPropertyDictionary.Add("otherletter", otherletter);

            int [] nonspacingmark = { 0x0340, 0x0341, 0x0344, 0x0343 };
            textNormalizationPropertyDictionary.Add("nonspacingmark", nonspacingmark);

            int [] spaceseparator = { 0x00A0, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A,
                                      0x202F, 0x205F, 0x3000 };
            textNormalizationPropertyDictionary.Add("spaceseparator", spaceseparator);

            int [] decimalnumber = { 0xFF10,  0x1D7CE, 0x1D7D8, 0x1D7E2, 0x1D7EC, 0x1D7F6,  0xFF11, 0x1D7CF, 0x1D7D9, 0x1D7E3, 0x1D7ED,
                                     0x200A,  0x1D7F7,  0xFF12, 0x1D7D0, 0x1D7DA, 0x1D7E4, 0x1D7EE, 0x1D7F8,  0xFF13, 0x1D7D1, 0x1D7DB, 0x1D7E5, 0x1D7EF,
                                     0x1D7F9,  0xFF14, 0x1D7D2, 0x1D7DC, 0x1D7E6, 0x1D7F0, 0x1D7FA,  0xFF15, 0x1D7D3, 0x1D7DD, 0x1D7E7, 0x1D7F1, 0x1D7FB,
                                     0xFF16,  0x1D7D4, 0x1D7DE, 0x1D7E8, 0x1D7F2, 0x1D7FC,  0xFF17, 0x1D7D5, 0x1D7DF, 0x1D7E9, 0x1D7F3, 0x1D7FD,  0xFF18,
                                     0x1D7D6, 0x1D7E0, 0x1D7EA, 0x1D7F4, 0x1D7FE,  0xFF19, 0x1D7D7, 0x1D7E1, 0x1D7EB, 0x1D7F5, 0x1D7FF };
            textNormalizationPropertyDictionary.Add("decimalnumber", decimalnumber);

            int [] othernumber = { 0x2474,  0x247D,  0x247E,  0x247F, 0x2480,  0x2481, 0x2482, 0x2483,  0x2484,  0x2485,  0x2486, 0x2475, 0x2487,  0x2476,
                                   0x2477,  0x2478,  0x2479,  0x247A, 0x247B,  0x247C, 0x2070, 0x2080,  0x24EA, 0x1F101, 0x1F100, 0x2189, 0x00B9,  0x2081, 0x2460, 0x1F102,
                                   0x2488,  0x2469,  0x2491,  0x246A, 0x2492,  0x246B, 0x2493, 0x246C,  0x2494,  0x246D,  0x2495, 0x246E, 0x2496,  0x246F, 0x2497,  0x2470,
                                   0x2498,  0x2499,  0x2472,  0x249A, 0x215F,  0x2152, 0x00BD, 0x2153,  0x00BC,  0x2155,  0x2159, 0x2150, 0x215B,  0x2151, 0x00B2,  0x2082,
                                   0x2461, 0x1F103,  0x2489,  0x2473, 0x249B,  0x3251, 0x3252, 0x3253,  0x3254,  0x3255,  0x3256, 0x3257, 0x3258,  0x3259, 0x2154,  0x2156,
                                   0x00B3,  0x2083,  0x2462, 0x1F104, 0x248A,  0x325A, 0x325B, 0x325C,  0x325D,  0x325E,  0x325F, 0x32B1, 0x32B2,  0x32B3, 0x32B4,  0x00BE,
                                   0x2157,  0x215C,  0x2074,  0x2084, 0x2463, 0x1F105, 0x248B, 0x32B5,  0x32B6,  0x32B7,  0x32B8, 0x32B9, 0x32BA,  0x32BB, 0x32BC,  0x32BD,
                                   0x32BE,  0x2158,  0x2075,  0x2085, 0x2464, 0x1F106, 0x248C, 0x32BF,  0x215A,  0x215D,  0x2076, 0x2086, 0x2465, 0x1F107, 0x248D,  0x2077,
                                   0x2087,  0x2466, 0x1F108,  0x248E, 0x215E,  0x2078, 0x2088, 0x2467, 0x1F109,  0x248F,  0x2079, 0x2089, 0x2468, 0x1F10A, 0x2490 };
            textNormalizationPropertyDictionary.Add("othernumber", othernumber);

            int [] kaithi = { 0x1109A, 0x1109C, 0x110AB };
            textNormalizationPropertyDictionary.Add("kaithi", kaithi);

            int [] balinese = { 0x1B06, 0x1B08, 0x1B0A, 0x1B0C, 0x1B0E, 0x1B12, 0x1B3B, 0x1B3D, 0x1B40, 0x1B41, 0x1B43 };
            textNormalizationPropertyDictionary.Add("balinese", balinese);

            int [] tifinagh = { 0x2D6F };
            textNormalizationPropertyDictionary.Add("tifinagh", tifinagh);

            int [] hiragana = { 0x3094, 0x304C, 0x304E, 0x3050, 0x3052, 0x3054, 0x3056, 0x3058, 0x305A, 0x305C,  0x305E, 0x3060, 0x3062, 0x3065,
                                0x3067, 0x3069, 0x3070, 0x3071, 0x3073, 0x3074, 0x3076, 0x3077, 0x3079, 0x307A, 0x1F200, 0x307C, 0x307D, 0x309F, 0x309E };
            textNormalizationPropertyDictionary.Add("hiragana", hiragana);

            int [] georgian = { 0x10FC };
            textNormalizationPropertyDictionary.Add("georgian", georgian);

            int [] myanmar = { 0x1026 };
            textNormalizationPropertyDictionary.Add("myanmar", myanmar);

            int [] tibetan = { 0x0F0C, 0x0F69, 0x0F43, 0x0F4D, 0x0F52, 0x0F57, 0x0F5C, 0x0F73, 0x0F75, 0x0F81, 0x0FB9, 0x0F93, 0x0F9D, 0x0FA2, 0x0FA7,
                               0x0FAC, 0x0F77, 0x0F76, 0x0F79, 0x0F78 };
            textNormalizationPropertyDictionary.Add("tibetan", tibetan);

            int [] lao = { 0x0EDC, 0x0EDD, 0x0EB3 };
            textNormalizationPropertyDictionary.Add("lao", lao);

            int [] th = { 0x0E33 };
            textNormalizationPropertyDictionary.Add("th", th);

            int [] sinhala = { 0x0DDA, 0x0DDC, 0x0DDD, 0x0DDE };
            textNormalizationPropertyDictionary.Add("sinhala", sinhala);

            int [] malayalam = { 0x0D4A, 0x0D4C, 0x0D4B };
            textNormalizationPropertyDictionary.Add("malayalam", malayalam);

            int [] kannada = { 0x0CC0, 0x0CCA, 0x0CCB, 0x0CC7, 0x0CC8 };
            textNormalizationPropertyDictionary.Add("kannada", kannada);

            int [] telugu = { 0x0C48 };
            textNormalizationPropertyDictionary.Add("telugu", telugu);

            int [] ta = { 0x0B94, 0x0BCA, 0x0BCC, 0x0BCB };
            textNormalizationPropertyDictionary.Add("ta", ta);

            int [] oriya = { 0x0B5C, 0x0B5D, 0x0B4B, 0x0B48, 0x0B4C };
            textNormalizationPropertyDictionary.Add("oriya", oriya);

            int [] gurmukhi = { 0x0A59, 0x0A5A, 0x0A5B, 0x0A5E, 0x0A33, 0x0A36 };
            textNormalizationPropertyDictionary.Add("gurmukhi", gurmukhi);

            int [] bengali = { 0x09DC, 0x09DD, 0x09DF, 0x09CB, 0x09CC };
            textNormalizationPropertyDictionary.Add("bengali", bengali);

            int [] devanagari = { 0x0958, 0x0959, 0x095A, 0x095B, 0x095C, 0x095D, 0x0929, 0x095E, 0x095E, 0x0931, 0x0934 };
            textNormalizationPropertyDictionary.Add("devanagari", devanagari);

            int [] he = { 0x2135, 0xFB21, 0xFB2E, 0xFB2F, 0xFB30, 0xFB4F, 0x2136, 0xFB31, 0xFB4C, 0x2137, 0xFB32, 0x2138, 0xFB22, 0xFB33, 0xFB23, 0xFB34,
                          0xFB4B, 0xFB35, 0xFB36, 0xFB38, 0xFB1D, 0xFB39, 0xFB3A, 0xFB24, 0xFB3B, 0xFB4D, 0xFB25, 0xFB3C, 0xFB26, 0xFB3E, 0xFB40, 0xFB41, 0xFB20,
                          0xFB43, 0xFB44, 0xFB4E, 0xFB46, 0xFB47, 0xFB27, 0xFB48, 0xFB49, 0xFB2C, 0xFB2D, 0xFB2D, 0xFB2B, 0xFB28, 0xFB4A, 0xFB1F };
            textNormalizationPropertyDictionary.Add("he", he);

            int [] hy = { 0x0587, 0xFB14, 0xFB15, 0xFB17, 0xFB13, 0xFB16 };
            textNormalizationPropertyDictionary.Add("hy", hy);

            int [] cyrillic = { 0x04D0, 0x04D1, 0x04D2, 0x04D3, 0x0403, 0x0453, 0x0400, 0x0450, 0x04D6, 0x04D7, 0x0401, 0x0451, 0x04C1, 0x04C2, 0x04DC,
                                0x04DD, 0x04DE, 0x04DF, 0x040D, 0x045D, 0x04E2, 0x04E3, 0x0419, 0x0439, 0x04E4, 0x04E5, 0x040C, 0x045C, 0x1D78, 0x04E6, 0x04E7, 0x04EE,
                                0xFB20, 0x04EF, 0x040E, 0x045E, 0x04F0, 0x04F1, 0x04F2, 0x04F3, 0x04F4, 0x04F5, 0x04F6, 0x04F7, 0x04F8, 0x04F9, 0x04EC, 0x04ED, 0x0407,
                                0x0457, 0x0476, 0x0477, 0x04DA, 0x04DB, 0x04EA, 0x04EB };
            textNormalizationPropertyDictionary.Add("cyrillic", cyrillic);

            int  i       = 0;
            bool isValid = false;

            codePointsWithDifferentNormalizationForms = new int [othersymbols.Length + modifiersymbols.Length + currencysymbols.Length + mathsymbols.Length +
                                                                 modifierletter.Length + otherletter.Length + nonspacingmark.Length + spaceseparator.Length + decimalnumber.Length + othernumber.Length +
                                                                 kaithi.Length + balinese.Length + tifinagh.Length + hiragana.Length + georgian.Length + myanmar.Length + tibetan.Length + lao.Length +
                                                                 th.Length + sinhala.Length + malayalam.Length + kannada.Length + telugu.Length + ta.Length + oriya.Length + gurmukhi.Length + bengali.Length
                                                                 + devanagari.Length + he.Length + hy.Length + cyrillic.Length];

            Dictionary <string, int[]> .ValueCollection valueColl = textNormalizationPropertyDictionary.Values;
            foreach (int [] values in valueColl)
            {
                foreach (int codePoint in values)
                {
                    if (codePoint >= expectedRange.StartOfUnicodeRange && codePoint <= expectedRange.EndOfUnicodeRange)
                    {
                        codePointsWithDifferentNormalizationForms[i++] = codePoint;
                        isValid = true;
                    }
                }
            }
            Array.Resize(ref codePointsWithDifferentNormalizationForms, i);
            Array.Sort(codePointsWithDifferentNormalizationForms);
            return(isValid);
        }