public void SingleCodePointRangeShouldHaveExpectedResults(int codePoint)
        {
            var range = new UnicodeCodePointRange(codePoint);

            Assert.Equal(codePoint, range.FirstCodePoint);
            Assert.Equal(codePoint, range.LastCodePoint);
            Assert.True(range.IsSingleCodePoint);
        }
        public void MultiCodePointRangeShouldHaveExpectedResults(int firstCodePoint, int lastCodePoint)
        {
            var range = new UnicodeCodePointRange(firstCodePoint, lastCodePoint);

            Assert.Equal(firstCodePoint, range.FirstCodePoint);
            Assert.Equal(lastCodePoint, range.LastCodePoint);
            Assert.False(range.IsSingleCodePoint);
        }
Beispiel #3
0
 private static async Task ProcessBlocksFile(IDataSource ucdSource, UnicodeInfoBuilder builder)
 {
     using (var reader = new UnicodeDataFileReader(await ucdSource.OpenDataFileAsync(BlocksFileName).ConfigureAwait(false), ';'))
     {
         while (reader.MoveToNextLine())
         {
             builder.AddBlockEntry(new UnicodeBlock(UnicodeCodePointRange.Parse(reader.ReadField()), reader.ReadTrimmedField()));
         }
     }
 }
Beispiel #4
0
 private static async Task ProcessEmojiDataFile(IDataSource emojiSource, UnicodeInfoBuilder builder)
 {
     using (var reader = new UnicodeDataFileReader(await emojiSource.OpenDataFileAsync(EmojiDataFileName).ConfigureAwait(false), ';'))
     {
         while (reader.MoveToNextLine())
         {
             var range = UnicodeCodePointRange.Parse(reader.ReadTrimmedField());
             if (EnumHelper <EmojiProperties> .TryGetNamedValue(reader.ReadTrimmedField(), out var property))
             {
                 builder.SetProperties(property, range);
             }
         }
     }
 }
Beispiel #5
0
        public void SetProperties(EmojiProperties property, UnicodeCodePointRange codePointRange)
        {
            int firstIndex = FindUcdCodePoint(codePointRange.FirstCodePoint);
            int lastIndex  = FindUcdCodePoint(codePointRange.LastCodePoint);

            if (firstIndex < 0 && lastIndex < 0)
            {
                Insert(new UnicodeCharacterDataBuilder(codePointRange)
                {
                    EmojiProperties = property
                });
                return;
            }

            if (firstIndex < 0 ||
                lastIndex < 0 ||
                ucdEntries[firstIndex].CodePointRange.FirstCodePoint <codePointRange.FirstCodePoint ||
                                                                      ucdEntries[lastIndex].CodePointRange.LastCodePoint> codePointRange.LastCodePoint)
            {
                throw new InvalidOperationException("Unable to find code point for setting emoji property.");
            }

            int i = firstIndex;

            while (true)
            {
                ucdEntries[i].EmojiProperties |= property;

                if (i == lastIndex)
                {
                    break;
                }

                ++i;
            }
        }
Beispiel #6
0
        private static async Task ProcessUnicodeDataFile(IDataSource ucdSource, UnicodeInfoBuilder builder)
        {
            using (var reader = new UnicodeDataFileReader(await ucdSource.OpenDataFileAsync(UnicodeDataFileName).ConfigureAwait(false), ';'))
            {
                int rangeStartCodePoint = -1;

                while (reader.MoveToNextLine())
                {
                    var codePoint = new UnicodeCodePointRange(int.Parse(reader.ReadField(), NumberStyles.HexNumber));

                    string name = reader.ReadField();

                    if (!string.IsNullOrEmpty(name) && name[0] == '<' && name[name.Length - 1] == '>')
                    {
                        if (name.EndsWith(", First>", StringComparison.OrdinalIgnoreCase))
                        {
                            if (rangeStartCodePoint >= 0)
                            {
                                throw new InvalidDataException("Invalid range data in UnicodeData.txt.");
                            }

                            rangeStartCodePoint = codePoint.FirstCodePoint;

                            continue;
                        }
                        else if (name.EndsWith(", Last>", StringComparison.OrdinalIgnoreCase))
                        {
                            if (rangeStartCodePoint < 0)
                            {
                                throw new InvalidDataException("Invalid range data in UnicodeData.txt.");
                            }

                            codePoint = new UnicodeCodePointRange(rangeStartCodePoint, codePoint.LastCodePoint);

                            name = name.Substring(1, name.Length - 8).ToUpperInvariant();                             // Upper-case the name in order to respect unicode naming scheme. (Spec says all names are uppercase ASCII)

                            rangeStartCodePoint = -1;
                        }
                        else if (name == "<control>")                         // Ignore the name of the property for these code points, as it should really be empty by the spec.
                        {
                            // For control characters, we can derive a character label in of the form <control-NNNN>, which is not the character name.
                            name = null;
                        }
                        else
                        {
                            throw new InvalidDataException("Unexpected code point name tag: " + name + ".");
                        }
                    }
                    else if (rangeStartCodePoint >= 0)
                    {
                        throw new InvalidDataException("Invalid range data in UnicodeData.txt.");
                    }

                    // NB: Fields 10 and 11 are deemed obsolete. Field 11 should always be empty, and will be ignored here.
                    var characterData = new UnicodeCharacterDataBuilder(codePoint)
                    {
                        Name     = NullIfEmpty(name),
                        Category = UnicodeCategoryInfo.FromShortName(reader.ReadField()).Category,
                        CanonicalCombiningClass = (CanonicalCombiningClass)byte.Parse(reader.ReadField()),
                    };

                    if (EnumHelper <BidirectionalClass> .TryGetNamedValue(reader.ReadField(), out var bidirectionalClass))
                    {
                        characterData.BidirectionalClass = bidirectionalClass;
                    }
                    else
                    {
                        throw new InvalidDataException(string.Format("Missing Bidi_Class property for code point(s) {0}.", codePoint));
                    }

                    characterData.CharacterDecompositionMapping = CharacterDecompositionMapping.Parse(NullIfEmpty(reader.ReadField()));

                    string numericDecimalField = NullIfEmpty(reader.ReadField());
                    string numericDigitField   = NullIfEmpty(reader.ReadField());
                    string numericNumericField = NullIfEmpty(reader.ReadField());

                    characterData.BidirectionalMirrored = reader.ReadField() == "Y";
                    characterData.OldName = NullIfEmpty(reader.ReadField());
                    reader.SkipField();
                    characterData.SimpleUpperCaseMapping = ParseSimpleCaseMapping(reader.ReadField());
                    characterData.SimpleLowerCaseMapping = ParseSimpleCaseMapping(reader.ReadField());
                    characterData.SimpleTitleCaseMapping = ParseSimpleCaseMapping(reader.ReadField());

                    // Handle Numeric_Type & Numeric_Value:
                    // If field 6 is set, fields 7 and 8 should have the same value, and Numeric_Type is Decimal.
                    // If field 6 is not set but field 7 is set, field 8 should be set and have the same value. Then, the type is Digit.
                    // If field 6 and 7 are not set, but field 8 is set, then Numeric_Type is Numeric.
                    if (numericNumericField != null)
                    {
                        characterData.NumericValue = UnicodeRationalNumber.Parse(numericNumericField);

                        if (numericDigitField != null)
                        {
                            if (numericDigitField != numericNumericField)
                            {
                                throw new InvalidDataException("Invalid value for field 7 of code point " + characterData.CodePointRange.ToString() + ".");
                            }

                            if (numericDecimalField != null)
                            {
                                if (numericDecimalField != numericDigitField)
                                {
                                    throw new InvalidDataException("Invalid value for field 6 of code point " + characterData.CodePointRange.ToString() + ".");
                                }
                                characterData.NumericType = UnicodeNumericType.Decimal;
                            }
                            else
                            {
                                characterData.NumericType = UnicodeNumericType.Digit;
                            }
                        }
                        else
                        {
                            characterData.NumericType = UnicodeNumericType.Numeric;
                        }
                    }

                    builder.Insert(characterData);
                }
            }
        }