Ejemplo n.º 1
0
        /// <summary>
        /// Define SurrogatePairProperty class
        /// <a href="http://www.unicode.org/charts/PDF/UD800.pdf">Newline</a>
        /// <a href="http://www.unicode.org/charts/PDF/UDC00.pdf">Newline</a>
        /// </summary>
        public SurrogatePairProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges)
        {
            bool isValid = false;

            foreach (UnicodeRange range in expectedRanges)
            {
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        surrogatePairRangeList,
                        "Surrogates",
                        GroupAttributes.GroupName))
                {
                    foreach (UnicodeRangeProperty data in surrogatePairRangeList)
                    {
                        if (data.Name.Equals("High Surrogates", StringComparison.OrdinalIgnoreCase))
                        {
                            highMin = data.Range.StartOfUnicodeRange;
                            highMax = data.Range.EndOfUnicodeRange;
                        }
                        else if (data.Name.Equals("Low Surrogates", StringComparison.OrdinalIgnoreCase))
                        {
                            lowMin = data.Range.StartOfUnicodeRange;
                            lowMax = data.Range.EndOfUnicodeRange;
                        }
                    }
                    isValid = true;
                }

                surrogateRange = RangePropertyCollector.GetRange(new UnicodeRange(0x10000, TextUtil.MaxUnicodePoint), range);
                if (null != surrogateRange)
                {
                    isValid = true;
                }
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("expectedRanges", "SurrogatePairProperty, SurrogatePair ranges are beyond expected range. " +
                                                      "Refert to Surrogates range and UTF32.");
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Get Unicode range according to Unicode chart provided
        /// </summary>
        public static UnicodeRange GetUnicodeChartRange(UnicodeRangeDatabase unicodeDb, UnicodeChart chart)
        {
            foreach (Group script in unicodeDb.Scripts)
            {
                if (script.UnicodeChart == chart)
                {
                    return(script.UnicodeRange);
                }

                if (null != script.SubGroups)
                {
                    foreach (SubGroup subScript in script.SubGroups)
                    {
                        if (subScript.UnicodeChart == chart)
                        {
                            return(subScript.UnicodeRange);
                        }
                    }
                }
            }

            foreach (Group symbol in unicodeDb.SymbolsAndPunctuation)
            {
                if (symbol.UnicodeChart == chart)
                {
                    return(symbol.UnicodeRange);
                }

                if (null != symbol.SubGroups)
                {
                    foreach (SubGroup subSymbol in symbol.SubGroups)
                    {
                        if (subSymbol.UnicodeChart == chart)
                        {
                            return(subSymbol.UnicodeRange);
                        }
                    }
                }
            }

            throw new ArgumentException(@"Invalid UnicodeChart, " + Enum.GetName(typeof(UnicodeChart), chart) + ". No match in the database.");
        }
        /// <summary>
        /// Define SurrogatePairDictionary class
        /// <a href="http://unicode.org/reports/tr9/">Newline</a>
        /// </summary>
        public BidiProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges)
        {
            bool isValid = false;

            foreach (UnicodeRange range in expectedRanges)
            {
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        bidiPropertyRangeList,
                        "Arabic",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }

                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        bidiPropertyRangeList,
                        "Hebrew",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
            }

            if (InitializeBidiDictionary(expectedRanges))
            {
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("expectedRanges", "BidiProperty, Bidi ranges are beyond expected range. " +
                                                      "Refer to Arabic and Hebrew ranges.");
            }

            // Reset isValid to validate Latin range
            isValid = false;
            foreach (UnicodeRange expectedRange in expectedRanges)
            {
                UnicodeRange range = RangePropertyCollector.GetRange(new UnicodeRange(0x0030, 0x0039), expectedRange);
                if (null != range)
                {
                    latinRangeList.Add(range);
                    isValid = true;
                }

                range = RangePropertyCollector.GetRange(new UnicodeRange(0x0041, 0x005A), expectedRange);
                if (null != range)
                {
                    latinRangeList.Add(range);
                    isValid = true;
                }

                range = RangePropertyCollector.GetRange(new UnicodeRange(0x0061, 0x007A), expectedRange);
                if (null != range)
                {
                    latinRangeList.Add(range);
                    isValid = true;
                }
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("expectedRanges", "BidiProperty, Bidi ranges are beyond expected range. " +
                                                      "0x0030 - 0x0039,  0x0041 - 0x005A, and 0x0061 - 0x007A ranges are needed to construct Bidi string.");
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Walk through Unicode range database to build up property according to Group attribute
        /// </summary>
        public static bool BuildPropertyDataList(
            UnicodeRangeDatabase unicodeDb,
            UnicodeRange expectedRange,
            List <UnicodeRangeProperty> dataList,
            string name,
            GroupAttributes attribute)
        {
            bool isAdded = false;

            foreach (Group script in unicodeDb.Scripts)
            {
                string scriptAttrib = script.GroupName;
                if (attribute == GroupAttributes.Name)
                {
                    scriptAttrib = script.Name;
                }
                else if (attribute == GroupAttributes.Ids)
                {
                    scriptAttrib = script.Ids;
                }

                if (scriptAttrib.Equals(name, StringComparison.OrdinalIgnoreCase))
                {
                    UnicodeRange range = GetRange(script.UnicodeRange, expectedRange);
                    if (null != range)
                    {
                        dataList.Add(new UnicodeRangeProperty(TextUtil.UnicodeChartType.Script, script.Name, script.Ids, range));
                        isAdded = true;
                    }

                    if (null != script.SubGroups)
                    {
                        foreach (SubGroup subScript in script.SubGroups)
                        {
                            range = GetRange(subScript.UnicodeRange, expectedRange);
                            if (null != range)
                            {
                                dataList.Add(new UnicodeRangeProperty(
                                                 TextUtil.UnicodeChartType.Script,
                                                 subScript.SubGroupName,
                                                 subScript.SubIds,
                                                 range));
                                isAdded = true;
                            }
                        }
                    }
                }
            }

            foreach (Group symbol in unicodeDb.SymbolsAndPunctuation)
            {
                string symbolAttrib = symbol.GroupName;
                if (attribute == GroupAttributes.Name)
                {
                    symbolAttrib = symbol.Name;
                }
                else if (attribute == GroupAttributes.Ids)
                {
                    symbolAttrib = symbol.Ids;
                }

                if (symbolAttrib.Equals(name, StringComparison.OrdinalIgnoreCase))
                {
                    TextUtil.UnicodeChartType type = TextUtil.UnicodeChartType.Other;
                    if ((symbol.GroupName.ToLower(CultureInfo.InvariantCulture)).Contains("symbols") ||
                        (symbol.Name.ToLower(CultureInfo.InvariantCulture)).Contains("symbols"))
                    {
                        type = TextUtil.UnicodeChartType.Symbol;
                    }
                    else if ((symbol.GroupName.ToLower(CultureInfo.InvariantCulture)).Contains("punctuation") ||
                             (symbol.Name.ToLower(CultureInfo.InvariantCulture)).Contains("punctuation"))
                    {
                        type = TextUtil.UnicodeChartType.Punctuation;
                    }

                    UnicodeRange range = GetRange(symbol.UnicodeRange, expectedRange);
                    if (null != range)
                    {
                        dataList.Add(new UnicodeRangeProperty(type, symbol.Name, symbol.Ids, range));
                        isAdded = true;
                    }

                    if (null != symbol.SubGroups)
                    {
                        foreach (SubGroup subSymbol in symbol.SubGroups)
                        {
                            range = GetRange(subSymbol.UnicodeRange, expectedRange);
                            if (null != range)
                            {
                                dataList.Add(new UnicodeRangeProperty(type, subSymbol.SubGroupName, subSymbol.SubIds, range));
                                isAdded = true;
                            }
                        }
                    }
                }
            }
            return(isAdded);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Define SurrogatePairDictionary class
        /// <a href="http://www.unicode.org/reports/tr15/">Newline</a>
        /// <a href="http://www.unicode.org/charts/normalization/">Newline</a>
        /// </summary>
        public TextNormalizationProperty(UnicodeRangeDatabase unicodeDb, Collection <UnicodeRange> expectedRanges)
        {
            bool isValid = false;

            foreach (UnicodeRange range in expectedRanges)
            {
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "Latin",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "CJK Unified Ideographs (Han)",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "CJK Compatibility Ideographs",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "Katakana",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "Hangul Jamo",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "Hangul Syllables",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "Arabic",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
                if (RangePropertyCollector.BuildPropertyDataList(
                        unicodeDb,
                        range,
                        textNormalizationRangeList,
                        "Greek",
                        GroupAttributes.Name))
                {
                    isValid = true;
                }
            }


            if (InitializeTextNormalizationPropertyDictionary(expectedRanges))
            {
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("expectedRanges", "TextNormalizationProperty, " +
                                                      "code points for text normalization ranges are beyond expected range. " + "Refert to Latin,  CJK Unified Ideographs (Han) " +
                                                      "CJK Compatibility Ideographs, Katakana, Hangul Jamo, Hangul Syllables, Arabic, and  Greek ranges.");
            }
        }
Ejemplo n.º 6
0
        private void CreateProperties(StringProperties properties, UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange)
        {
            if (null != properties.HasNumbers)
            {
                if ((bool)properties.HasNumbers)
                {
                    numberProperty     = new NumberProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint += NumberProperty.MINNUMOFCODEPOINT;
                    propertyDictionary.Add(PropertyName.Number, numberProperty);
                }
            }

            if (null != properties.IsBidirectional)
            {
                if ((bool)properties.IsBidirectional)
                {
                    bidiProperty       = new BidiProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint += BidiProperty.MINNUMOFCODEPOINT;
                    propertyDictionary.Add(PropertyName.Bidi, bidiProperty);
                }
            }

            if (null != properties.NormalizationForm)
            {
                textNormalizationProperty = new TextNormalizationProperty(unicodeDb, expectedRange);
                minNumOfCodePoint        += TextNormalizationProperty.MINNUMOFCODEPOINT;
                propertyDictionary.Add(PropertyName.TextNormalization, textNormalizationProperty);
            }

            if (null != properties.MinNumberOfCombiningMarks)
            {
                if (0 != properties.MinNumberOfCombiningMarks)
                {
                    combiningMarksProperty = new CombiningMarksProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint     += CombiningMarksProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfCombiningMarks;
                    propertyDictionary.Add(PropertyName.CombiningMarks, combiningMarksProperty);
                }
            }

            if (null != properties.MinNumberOfEndUserDefinedCodePoints)
            {
                if (0 != properties.MinNumberOfEndUserDefinedCodePoints)
                {
                    eudcProperty       = new EudcProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint += EudcProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfEndUserDefinedCodePoints;
                    propertyDictionary.Add(PropertyName.Eudc, eudcProperty);
                }
            }

            if (null != properties.MinNumberOfLineBreaks)
            {
                if (0 != properties.MinNumberOfLineBreaks)
                {
                    lineBreakProperty  = new LineBreakProperty(expectedRange);
                    minNumOfCodePoint += LineBreakProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfLineBreaks;
                    propertyDictionary.Add(PropertyName.LineBreak, lineBreakProperty);
                }
            }

            if (null != properties.MinNumberOfSurrogatePairs)
            {
                if (0 != properties.MinNumberOfSurrogatePairs)
                {
                    surrogatePairProperty = new SurrogatePairProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint    += SurrogatePairProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfSurrogatePairs;
                    propertyDictionary.Add(PropertyName.Surrogate, surrogatePairProperty);
                }
            }

            if (null != properties.MinNumberOfTextSegmentationCodePoints)
            {
                if (0 != properties.MinNumberOfTextSegmentationCodePoints)
                {
                    textSegmentationProperty = new TextSegmentationProperty(unicodeDb, expectedRange);
                    minNumOfCodePoint       += TextSegmentationProperty.MINNUMOFCODEPOINT * (int)properties.MinNumberOfTextSegmentationCodePoints;
                    propertyDictionary.Add(PropertyName.TextSegmentation, textSegmentationProperty);
                }
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Define SurrogatePairDictionary class
        /// <a href="http://unicode.org/reports/tr9/">Newline</a>
        /// </summary>
        public BidiProperty(UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange)
        {
            bool isValid = false;

            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    bidiPropertyRangeList,
                    "Arabic",
                    GroupAttributes.Name))
            {
                isValid = true;
            }

            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    bidiPropertyRangeList,
                    "Hebrew",
                    GroupAttributes.Name))
            {
                isValid = true;
            }

            if (InitializeBidiDictionary(expectedRange))
            {
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("BidiProperty, Bidi ranges are beyond expected range, " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) +
                                                      ". Refer to Arabic and Hebrew ranges.");
            }

            UnicodeRange range = RangePropertyCollector.GetRange(new UnicodeRange(0x0030, 0x0039), expectedRange);

            if (null != range)
            {
                latinRangeList.Add(range);
                isValid = true;
            }

            range = RangePropertyCollector.GetRange(new UnicodeRange(0x0041, 0x005A), expectedRange);
            if (null != range)
            {
                latinRangeList.Add(range);
                isValid = true;
            }

            range = RangePropertyCollector.GetRange(new UnicodeRange(0x0061, 0x007A), expectedRange);
            if (null != range)
            {
                latinRangeList.Add(range);
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("BidiProperty, Bidi ranges are beyond expected range, " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) +
                                                      ". 0x0030 - 0x0039,  0x0041 - 0x005A, and 0x0061 - 0x007A ranges are needed to construct Bidi string.");
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Define SurrogatePairDictionary class
        /// <a href="http://www.unicode.org/reports/tr15/">Newline</a>
        /// <a href="http://www.unicode.org/charts/normalization/">Newline</a>
        /// </summary>
        public TextNormalizationProperty(UnicodeRangeDatabase unicodeDb, UnicodeRange expectedRange)
        {
            bool isValid = false;

            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Latin",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "CJK Unified Ideographs (Han)",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "CJK Compatibility Ideographs",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Katakana",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Hangul Jamo",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Hangul Syllables",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Arabic",
                    GroupAttributes.Name))
            {
                isValid = true;
            }
            if (RangePropertyCollector.BuildPropertyDataList(
                    unicodeDb,
                    expectedRange,
                    textNormalizationRangeList,
                    "Greek",
                    GroupAttributes.Name))
            {
                isValid = true;
            }


            if (InitializeTextNormalizationPropertyDictionary(expectedRange))
            {
                isValid = true;
            }

            if (!isValid)
            {
                throw new ArgumentOutOfRangeException("TextNormalizationProperty, code points for text normalization ranges are beyond expected range, " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.StartOfUnicodeRange) + " - " +
                                                      String.Format(CultureInfo.InvariantCulture, "0x{0:X}", expectedRange.EndOfUnicodeRange) + ". Refert to Latin,  CJK Unified Ideographs (Han) " +
                                                      "CJK Compatibility Ideographs, Katakana, Hangul Jamo, Hangul Syllables, Arabic, and  Greek ranges.");
            }
        }